hadoop示例WordCount(代码)
程序员文章站
2022-03-23 09:29:11
hadoop示例WordCount(代码)
1. hapoop-2.7.3 解压,加入环境变量
2. 下载 hadoo2.7.3的hadoop.dll和winutils.exe 文...
hadoop示例WordCount(代码)
1. hapoop-2.7.3 解压,加入环境变量 2. 下载 hadoo2.7.3的hadoop.dll和winutils.exe 文件 3. hadop.dll 文件放入 C:\Windows\System32 ,winutils.exe放入 E:\workspace\java\ hadoop-2.7.3\bin目录下,防止在windows下 莫名其妙的报错 4.
pom.xml
4.0.0 com.kay hadoopDemo01 1.0-SNAPSHOT jar hadoopDemo01 https://maven.apache.orgjunit junit 4.12 org.apache.hadoop hadoop-client 2.7.3 org.apache.hadoop hadoop-common 2.7.3 org.apache.hadoop hadoop-hdfs 2.7.3 UTF-8
WordCountMapper.java
package com.kay; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.util.StringUtils; import java.io.IOException; /** * Created by kay on 2017/12/12. */ public class WordCountMapper extends Mapper{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] words = StringUtils.split(value.toString(), ' '); for (String w : words) { context.write(new Text(w), new IntWritable(1)); } } }
WordCountReducer.java
package com.kay; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /** * Created by kay on 2017/12/12. */ public class WordCountReducer extends Reducer{ @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable i : values) { sum = sum + i.get(); } context.write(key, new IntWritable(sum)); } }
App.java
package com.kay; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * Hello world! * */ public class App { public static void main( String[] args ){ //若hadoop未加入环境变量可以加上这句 //System.setProperty("hadoop.home.dir", "E:\\workspace\\java\\hadoop-2.7.3"); Configuration config = new Configuration(); //设置hdfs的通讯地址 config.set("fs.defaultFS", "hdfs://192.168.1.200:9000"); //设置RN的主机 config.set("yarn.resourcemanager.hostname", "master"); try { FileSystem fs = FileSystem.get(config); Job job = Job.getInstance(config); job.setJarByClass(App.class); job.setJobName("wc"); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //输入路径 FileInputFormat.addInputPath(job, new Path("/user/test")); //输出路径 Path outpath = new Path("/user/out"); if (fs.exists(outpath)) { fs.delete(outpath, true); } FileOutputFormat.setOutputPath(job, outpath); boolean f = job.waitForCompletion(true); if (f) { System.out.println("job任务执行成功"); } } catch (Exception e) { e.printStackTrace(); } } }