HDFS java API操作
程序员文章站
2024-03-23 08:01:15
...
HDFS的javaAPI操作
目标:掌握如何使用API对HDFS上的目录和数据进行增、删、改、查操作。
1.idea创建maven工程
2.修改pom.xml文件如下:
(需要下载jar包,时间可能稍长)
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com</groupId> <!-- 自己的组名 -->
<artifactId>aa</artifactId> <!-- 自己的项目名 -->
<version>1.0-SNAPSHOT</version>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.Hadoop</groupId>
<artifactId>Hadoop-client</artifactId>
<version>2.6.0-mr1-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.Hadoop</groupId>
<artifactId>Hadoop-common</artifactId>
<version>2.6.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.Hadoop</groupId>
<artifactId>Hadoop-hdfs</artifactId>
<version>2.6.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.Hadoop</groupId>
<artifactId>Hadoop-mapreduce-client-core</artifactId>
<version>2.6.0-cdh5.14.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.9</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
<!-- <verbal>true</verbal>-->
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<minimizeJar>true</minimizeJar>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
3.下载完成后编写java类 可一个一个尝试
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.net.URI;
public class HDFSDemo {
public static void main(String[] args) throws Exception {
/*
//创建文件夹
Configuration configuration = new Configuration();
FileSystem fs=FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration);
boolean b = fs.mkdirs(new Path("/003"));
if (b){
System.out.println("成功!");
}else {
System.out.println("失败!");
}
*/
/*
//删除文件夹
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS","hdfs://192.168.10.101:8020");
FileSystem fs = FileSystem.newInstance(new URI("/"), configuration);
boolean b = fs.delete(new Path("/003"), true);
if (b){
System.out.println("成功!");
}else {
System.out.println("失败!");
}
*/
/*
//修改文件夹
Configuration configuration = new Configuration();
FileSystem fs= FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration);
boolean b = fs.rename(new Path("/002"),new Path("/003"));
if (b){
System.out.println("成功!");
}else {
System.out.println("失败!");
}
*/
/*
//查看文件夹
Configuration configuration = new Configuration();
FileSystem fs= FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration);
FileStatus[] fi = fs.listStatus(new Path("/"));
for (FileStatus f : fi) {
System.out.println("path:"+f.getPath());
System.out.println("name:"+f.getPath().getName());
}
*/
/*
//上传数据
Configuration configuration = new Configuration();
FileSystem fs= FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration);
fs.copyFromLocalFile(new Path(""),new Path("/"));
fs.close();
*/
//下载数据
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.10.101:8020"), configuration);
fs.copyToLocalFile(new Path(""), new Path("/"));
fs.close();
}
}
如果执行出现以下错误,也可以不用理会,不会影响程序的执行。配置hadoo环境变量之后重启开发工具就可以了
获取FileSystem的几种方式
- 第一种:
@Test public void getFileSystem() throws URISyntaxException, IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.52.100:8020"), configuration); System.out.println(fileSystem.toString()); }
- 第二种:
@Test public void getFileSystem2() throws URISyntaxException, IOException { Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://192.168.52.100:8020"); FileSystem fileSystem = FileSystem.get(new URI("/"), configuration); System.out.println(fileSystem.toString()); }
3. 第三种:
@Test public void getFileSystem3() throws URISyntaxException, IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.newInstance(new URI("hdfs://192.168.52.100:8020"), configuration); System.out.println(fileSystem.toString()); }
4.第四种:
@Test public void getFileSystem4() throws Exception{ Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://192.168.52.100:8020"); FileSystem fileSystem = FileSystem.newInstance(configuration); System.out.println(fileSystem.toString()); }
递归遍历文件系统当中的所有文件
通过递归遍历hdfs文件系统
@Test
public void listFile() throws Exception{
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.52.100:8020"), new Configuration());
FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
if(fileStatus.isDirectory()){
Path path = fileStatus.getPath();
listAllFiles(fileSystem,path);
}else{
System.out.println("文件路径为"+fileStatus.getPath().toString());
}
}
}
public void listAllFiles(FileSystem fileSystem,Path path) throws Exception{
FileStatus[] fileStatuses = fileSystem.listStatus(path);
for (FileStatus fileStatus : fileStatuses) {
if(fileStatus.isDirectory()){
listAllFiles(fileSystem,fileStatus.getPath());
}else{
Path path1 = fileStatus.getPath();
System.out.println("文件路径为"+path1);
}
}
}
官方提供的API直接遍历
/**
* 递归遍历官方提供的API版本
* @throws Exception
*/
@Test
public void listMyFiles()throws Exception{
//获取fileSystem类
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.52.100:8020"), new Configuration());
//获取RemoteIterator 得到所有的文件或者文件夹,第一个参数指定遍历的路径,第二个参数表示是否要递归遍历
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path("/"), true);
while (locatedFileStatusRemoteIterator.hasNext()){
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
System.out.println(next.getPath().toString());
}
fileSystem.close();
}
上一篇: HDFS Java api操作
下一篇: 学习vue.js