Hadoop HDFS JAVA API 实战
程序员文章站
2022-03-08 20:21:34
...
JAVA API 远程调用Hadoop
通过java Api远程调用Hadoop,实现上传文件,下载文件,删除文件等功能。
第一步 在pom.xml文件上添加依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.6</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
<!-- <scope>provided</scope> -->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-archives -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-archives</artifactId>
<version>2.7.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-auth -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.7.6</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-datajoin -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-datajoin</artifactId>
<version>2.7.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-distcp -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-distcp</artifactId>
<version>2.7.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.6</version>
<exclusions>
<exclusion>
<artifactId>netty-all</artifactId>
<groupId>io.netty</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-annotations -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
<version>2.7.6</version>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.6</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
第二步 创建测试类FileSystemCat
package com.example.wfhadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @author wufei
* @create 2018-10-30 11:05
**/
public class FileSystemCat {
static Configuration conf = new Configuration();
static FileSystem hdfs;
static {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hdn1:9000");
try {
try {
hdfs = FileSystem.get(URI.create("hdfs://hdn1:9000"),conf,"hadoop");
} catch (InterruptedException e) {
e.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
try {
String dir = "/Hadoop/wftest";
String src = "E:\\hadoofile\\ok1.txt";
//显示文件目录
//listFiles("hdfs://hdn1:9000/Hadoop/Input/");
//创建文件目录
createDir(dir);
//上传文件
//uploadFile(src,dir);
//下载文件
//down(src,"hdfs://hdn1:9000/Hadoop/Input/ok1.txt");
//删除文件
//deleteFile("/Hadoop/Input/ok.txt");
}catch (Exception e){
e.printStackTrace();
}
}
/**
* 下载文件
* @param localSrc E:\hadoofile\ok1.txt
* @param hdfsDst hdfs://hdn1:9000/Hadoop/Input/ok1.txt
*/
public static void down(String localSrc, String hdfsDst){
FSDataInputStream in = null;
try {
in = hdfs.open(new Path(hdfsDst));
OutputStream out = new FileOutputStream(localSrc);
IOUtils.copyBytes(in, out, 4096, true);
} catch (IOException e) {
e.printStackTrace();
} finally {
if(in != null){
IOUtils.closeStream(in);
}
}
}
public static void createDir(String dir) throws Exception {
Path path = new Path(dir);
if (hdfs.exists(path)) {
return;
}
hdfs.mkdirs(path);
}
/**
* 文件列表
* @param dirName 文件目录
* @throws IOException
*/
public static void listFiles(String dirName) throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hdn1:9000");
FileSystem hdfs = null;
FSDataInputStream in = null;
hdfs = FileSystem.get(conf);
Path path = new Path(dirName);
FileStatus[] status = hdfs.listStatus(path);
System.out.println(dirName + " has all files:");
for (int i = 0; i < status.length; i++) {
System.out.println(status[i].getPath().toString());
}
}
/**
* 文件上传
* @param localSrc
* @param hdfsDst
* @throws IOException
*/
public static void uploadFile(String localSrc, String hdfsDst) throws IOException {
Path src = new Path(localSrc);
Path dst = new Path(hdfsDst);
if (!(new File(localSrc)).exists()) {
return;
}
if (!hdfs.exists(dst)) {
return;
}
String dstPath = dst.toUri() + "/" + src.getName();
if (hdfs.exists(new Path(dstPath))) {
}
hdfs.copyFromLocalFile(src, dst);
FileStatus files [] = hdfs.listStatus(dst);
for (FileStatus file : files) {
System.out.println(file.getPath());
}
}
/**
* 删除文件
* @param fileName
* @throws IOException
*/
public static void deleteFile(String fileName) throws IOException {
Path f = new Path(fileName);
boolean isExists = hdfs.exists(f);
if (isExists) { // if exists, delete
boolean isDel = hdfs.delete(f, true);
} else {
System.out.println(fileName + " exist? \t" + isExists);
}
}
}
第三步 测试
1.执行上传文件
查看文件是否上传成功
[[email protected] ~]$ hadoop fs -ls /Hadoop/Input
Found 2 items
-rw-r--r-- 3 hadoop supergroup 17 2018-10-31 09:39 /Hadoop/Input/ok1.txt
-rw-r--r-- 2 hadoop supergroup 34 2018-10-30 09:39 /Hadoop/Input/wordcount.txt
上一篇: [C++] 公有,保护和私有继承
下一篇: Java继承练习代码
推荐阅读
-
ElasticSearch实战系列三: ElasticSearch的JAVA API使用教程
-
Python API 操作Hadoop hdfs详解
-
Java生鲜电商平台-统一格式返回的API架构设计与实战
-
[Hadoop]HDFS:java API
-
Hadoop:本地文件(window系统)定时获取文件并上传至HDFS文件(两个虚拟机)系统 Java 实现
-
Java程序中不通过hadoop jar的方式访问hdfs
-
Hadoop学习(2)-java客户端操作hdfs及secondarynode作用
-
ElasticSearch实战系列三: ElasticSearch的JAVA API使用教程
-
IDEA远程连接HBase及其Java API实战详解
-
Python API 操作Hadoop hdfs详解