欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

java实现对hadoop hdfs的基本目录和文件操作

程序员文章站 2022-05-14 23:05:17
...

java实现对hadoop hdfs的基本目录和文件操作


1、首先在eclipse或者IntelliJIDEA中创建一个maven工程。
2、在pom.xml文件中增加hadoop hdfs的操作依赖,如下:

<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>com.wongoing</groupId>
	<artifactId>hadoop</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<name>hadoophdfs01</name>

	<properties>
		<java.version>1.8</java.version>
		<hadoop.version>3.3.0</hadoop.version>
	</properties>
	
	<dependencies>
		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.13.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>${hadoop.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>${hadoop.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>${hadoop.version}</version>
		</dependency>		
	</dependencies>
</project>

3、编写一个单元测试类HadoopHdfsTest.java,实现创建目录、上传文件、列表文件、删除文件、删除目录等操作,如下:

package com.wongoing.hadoop.test;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.BeforeClass;
import org.junit.Test;

public class HadoopHdfsTest {
	private static String hdfsUrl = "hdfs://172.16.1.153:9000";	// 对应core-site.xml中配置的端口
	private static Configuration conf;
	private static FileSystem fs;
	
	/**
	 * 功能说明:初始化
	 * 修改说明:
	 * @author zheng
	 * @date 2020-12-9 10:08:20
	 * @throws IOException
	 * @throws InterruptedException
	 * @throws URISyntaxException
	 */
	@BeforeClass
	public static void init() throws IOException, InterruptedException, URISyntaxException {
		 conf = new Configuration();
		 conf.set("fs.defaultFS", hdfsUrl);
		 fs = FileSystem.get(new URI(hdfsUrl), conf, "root");
	}
	/**
	 * 功能说明:创建目录
	 * 修改说明:
	 * @author zheng
	 * @date 2020-12-9 11:14:24
	 * @throws IllegalArgumentException
	 * @throws IOException
	 */
	@Test
	public void testMkdir() throws IllegalArgumentException, IOException {
		String dir = "/javaTest";
		fs.mkdirs(new Path(dir));
	}
	/**
	 * 功能说明:测试文件上传
	 * 修改说明:
	 * @author zheng
	 * @throws IOException 
	 * @throws IllegalArgumentException 
	 * @date 2020-12-9 10:08:29
	 */
	@Test
	public void testUploadFile() throws IllegalArgumentException, IOException {
		String localFilePath = "E:/test.png";			//本地文件路径
		String hdfsFilePath = "/javaTest/test.png";			//hdfs文件路径
		
		FSDataOutputStream fout = fs.create(new Path(hdfsFilePath), true);		//第二个参数true,代表是否覆盖
		InputStream in = new FileInputStream(localFilePath);
		
		IOUtils.copyBytes(in, fout, 1024, true);		//复制流,并且完成之后关闭流
		
		System.out.println("文件上传完毕..");
	}
	
	/**
	 * 功能说明:查看某个目录下的文件
	 * 修改说明:
	 * @author zheng
	 * @throws IOException 
	 * @throws IllegalArgumentException 
	 * @throws FileNotFoundException 
	 * @date 2020-12-10 10:03:41
	 */
	@Test
	public void testViewFileList() throws FileNotFoundException, IllegalArgumentException, IOException {
		
		String dir = "/javaTest";
		FileStatus[] status = fs.listStatus(new Path(dir));		//listStatus方法用于列出目录下的所有文件
		Path[] listPaths = FileUtil.stat2Paths(status);			//用FileUtil工具类将FileStatus类型的数组转换为Path类型的数组
		for(Path p : listPaths) {
			System.out.println(p);
		}
	}
	
	/**
	 * 功能说明:重命名文件
	 * 修改说明:
	 * @author zheng
	 * @date 2020-12-10 9:56:37
	 * @throws IllegalArgumentException
	 * @throws IOException
	 */
	@Test
	public void testRenameFile() throws IllegalArgumentException, IOException {
		String oldFileName = "/javaTest/test.png";
		String newFileName = "/javaTest/abc.png";
		fs.rename(new Path(oldFileName), new Path(newFileName));
	}
	
	/**
	 * 功能说明:删除文件
	 * 修改说明:
	 * @author zheng
	 * @date 2020-12-9 13:09:38
	 * @throws IllegalArgumentException
	 * @throws IOException
	 */
	@Test
	public void testDeleteFile() throws IllegalArgumentException, IOException {
		String hdfsFilePath = "/javaTest/abc.png";			//hdfs文件路径
		fs.delete(new Path(hdfsFilePath), true);			//true代表递归删除子目录和文件
	}
	
	/**
	 * 功能说明:删除目录
	 * 修改说明:
	 * @author zheng
	 * @date 2020-12-9 13:07:34
	 * @throws IllegalArgumentException
	 * @throws IOException
	 */
	@Test
	public void testRemoveDir() throws IllegalArgumentException, IOException {
		String dir = "/javaTest";
		fs.delete(new Path(dir), true);
	}
}