欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

HDFS相关操作

程序员文章站 2024-03-23 08:05:10
...

HDFS网页操作

Web Console: 50070、50090

HDFS的命令行操作

HDFS操作命令

-mkdir
-ls
-ls -R
-put
-moveFromLocal
-copyFromLocal
-copyToLocal
-get
-rm
-getmerge
-cp
-mv
-count
-du
-text、-cat
balancer

HDFS管理命令

-report
-safemode 

Java API

创建目录

package demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

/*
 * 原因:
 * Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): 
 * Permission denied: user=lenovo, access=WRITE, inode="/folder1":root:supergroup:drwxr-xr-x
 * 
 * 当前用户:lenovo 执行w权限
 *  HDFS的根的权限:root:supergroup:drwxr-xr-x
 *  
 *  四种方式,执行程序:
 *  1、设置一个属性
 *  2、使用-D参数
 *  3、改变目录的权限  hdfs dfs -chmod 777 /folder2
 *  4、dfs.permissions  ---> false  禁用HDFS的权限检查功能
 */
public class TestMkDir {
    @Test
    public void test1() throws Exception{
        //方式一:设置一个属性,代表用户的身份
        System.setProperty("HADOOP_USER_NAME", "root");
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder1"));    
        //关闭客户端
        client.close();
    }

    @Test
    public void test2() throws Exception{
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder2"));
        //关闭客户端
        client.close();
    }

    @Test
    public void test3() throws Exception{
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder2/folder3"));    
        //关闭客户端
        client.close();
    }

    @Test
    public void test4() throws Exception{
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder4"));
        //关闭客户端
        client.close();
    }
}

上传文件

HDFS相关操作

package demo;

import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

public class TestUpload {
    @Test
    public void test1() throws Exception{
        //构造一个输入流
        InputStream in = new FileInputStream("d:\\dowload\\hadoop-2.4.1.zip");
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //得到一个输出流
        OutputStream out = client.create(new Path("/tools/a.zip"));
        //构造一个缓冲区
        byte[] buffer = new byte[1024];
        int len=0;
        while((len=in.read(buffer)) >0) {
            //读取到了数据
            out.write(buffer, 0, len);
        }
        out.flush();
        out.close();
        in.close();
    }

    @Test
    public void test2() throws Exception{
        //构造一个输入流
        InputStream in = new FileInputStream("d:\\dowload\\hadoop-2.4.1.zip");
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //得到一个输出流
        OutputStream out = client.create(new Path("/tools/b.zip"));     
        //使用工具类简化程序
        IOUtils.copyBytes(in, out, 1024);
    }
}

下载文件

HDFS相关操作

package demo;

import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

public class TestDownload {
    @Test
    public void test1() throws Exception{
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //打开一个输入流 <------HDFS
        InputStream in = client.open(new Path("/tools/a.zip"));
        //构造一个输出流  ----> d:\temp\aa.zip
        OutputStream out = new FileOutputStream("d:\\temp\\bb.zip");
        //使用工具类简化程序
        IOUtils.copyBytes(in, out, 1024);
//      //构造一个缓冲区
//      byte[] buffer = new byte[1024];
//      int len=0;
//      while((len=in.read(buffer)) >0) {
//          //读取到了数据
//          out.write(buffer, 0, len);
//      }
//      out.flush();
//      out.close();
//      in.close();
    }
}

查看文件信息

package demo;

import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.junit.Test;

public class TestMetaData {
    @Test
    public void testCheckFileInfo() throws Exception{
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //获取该目录下所有文件的信息
        FileStatus[] filesStatus = client.listStatus(new Path("/tools"));
        for(FileStatus f:filesStatus){
            System.out.println(f.isDirectory()?"目录":"文件");
            System.out.println(f.getPath().getName());
            System.out.println(f.getBlockSize());
            System.out.println("*************************");
        }
        client.close();
    }

    @Test
    public void testCheckFileBlock() throws Exception{
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //获取该文件的信息
        FileStatus fs = client.getFileStatus(new Path("/tools/a.zip"));
        //获取文件的数据块的信息
        BlockLocation[] location = client.getFileBlockLocations(fs, 0, fs.getLen());
        for(BlockLocation block:location){
            //block.getHosts() ---> 为什么返回一个String[]???          System.out.println(Arrays.toString(block.getHosts()) + "\t"+ Arrays.toString(block.getNames()));
        }
        client.close();
    }
}

HDFS的高级功能

1. 回收站

HDFS回收站的本质:ctrl +x 移动到一个隐藏目录
修改 core-site.xml 文件:

//默认禁用
<property>
   <name>fs.trash.interval</name>
   <value>1440</value>
</property>

没有回收站时

日志:
18/04/09 21:35:40 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.Deleted /tools ---> 成功删除(对比:回收站)

有回收站时

Moved: 'hdfs://bigdata111:9000/tools/a.zip' to trash at: hdfs://bigdata111:9000/user/root/.Trash/Current

查看回收站

hdfs dfs -lsr /user/root/.Trash/Current

从回收站中恢复

hdfs dfs -cp /user/root/.Trash/Current/tools/a.zip /tools   

2、快照

本质:cp命令
管理命令

[-allowSnapshot <snapshotDir>]
[-disallowSnapshot <snapshotDir>]

操作命令

[-createSnapshot <snapshotDir> [<snapshotName>]]
[-deleteSnapshot <snapshotDir> <snapshotName>]
[-renameSnapshot <snapshotDir> <oldName> <newName>]

开启快照

hdfs dfsadmin -allowSnapshot /students

创建一个备份

hdfs dfs -createSnapshot /students backup_student_0411_01
日志:Created snapshot /students/.snapshot/backup_student_0411_01
hdfs dfs -put student03.txt /students
hdfs dfs -createSnapshot /students backup_student_0411_02

恢复快照

hdfs dfs -cp /input/.snapshot/backup_input_01/data.txt /input

3、配额

名称配额: 限制某个目录下,文件的个数

[-setQuota <quota> <dirname>...<dirname>]
[-clrQuota <dirname>...<dirname>]   
hdfs dfs -mkdir /folder1
hdfs dfsadmin -setQuota 3 /folder1
实际是:N-1

空间配额: 限制某个目录下,文件的大小

[-setSpaceQuota <quota> [-storageType <storagetype>] <dirname>...<dirname>]
[-clrSpaceQuota [-storageType <storagetype>] <dirname>...<dirname>] 

设置空间配额:1M

hdfs dfs -mkdir /folder2
hdfs dfsadmin -setSpaceQuota 1M /folder2

错误:

The DiskSpace quota of /folder2 is exceeded: quota = 1048576 B = 1 MB but diskspace consumed = 134217728 B = 128 MB

注意:设置的值一定不能小于128M

4、安全模式

hdfs dfsadmin -safemode get 查看安全模式状态
hdfs dfsadmin -safemode enter  进入安全模式状态
hdfs dfsadmin -safemode leave 离开安全模式