myeclipse运行mapreduce报错:Cannot run program "cygpath
直接上代码:Mapper package com.winksi.dotstat;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.mapreduce.Mapper;public class DotProducedMapper extends MapperLongW
直接上代码:Mapper
package com.winksi.dotstat; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; public class DotProducedMapper extends MapperReducer:{ @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ String line = value.toString(); String fileds[] = line.split(","); String imsi = fileds[0]; String adccompany = fileds[1]; String phone = fileds[2]; String cur_time = fileds[3]; String call_length = fileds[4]; String call_type = fileds[5]; String show_length = fileds[6]; String pkgname = fileds[7]; String ip = fileds[8]; String model = fileds[9]; String ossdk = fileds[10]; String av = fileds[11]; String cityId = fileds[12]; StringBuffer strKey = new StringBuffer(); strKey.append(model).append(",").append(adccompany).append(",") .append(ossdk.toUpperCase()).append(",").append(av).append(",") .append(phone); context.write(new Text(strKey.toString()), new Text(imsi)); } }
package com.winksi.dotstat; import java.io.IOException; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class DotProducedReducer extends ReducerSpring配置文件:{ public void reduce(Text key, Iterator values, Context context) throws IOException, InterruptedException { int count = 0; Set set = new HashSet (); while (values.hasNext()) { set.add(values.next().toString()); count++; } StringBuffer sb = new StringBuffer(); sb.append(count).append(",").append(set.size()); Date yesterdayDate = new Date(new Date().getTime() - 1*24*60*60*1000); String yesterday = DotStatPvUv.getCurrentDay(yesterdayDate); StringBuffer sbKey = new StringBuffer(); sbKey.append(key.toString()).append(",").append(yesterday); context.write(new Text(sbKey.toString()), new Text(sb.toString())); } }
maven的jar包pom.xmlfs.default.name=${hd.fs}
java代码:org.springframework.data spring-data-hadoop 1.0.2.RELEASE-cdh4 org.apache.hadoop hadoop-core ${hadoop.core.version}
public class Test {
public static void main(String[] args) {
ApplicationContext ctx = new ClassPathXmlApplicationContext("hadoop-mr.xml");
}
}
运行报错:
Exception in thread "main" java.io.IOException: Cannot run program "cygpath": CreateProcess error=2, ?????μ??? at java.lang.ProcessBuilder.start(ProcessBuilder.java:459) at org.apache.hadoop.util.Shell.runCommand(Shell.java:201) at org.apache.hadoop.util.Shell.run(Shell.java:183) at org.apache.hadoop.fs.FileUtil$CygPathCommand.原因是尽管连接的hadoop平台是在linux上面,但是我们的eclipse调试环境是在windows,我们需要安装一个linux模拟器“cygwin”来支持程序的运行。(FileUtil.java:413) at org.apache.hadoop.fs.FileUtil.makeShellPath(FileUtil.java:439) at org.apache.hadoop.fs.FileUtil.makeShellPath(FileUtil.java:466) at org.apache.hadoop.fs.RawLocalFileSystem.execCommand(RawLocalFileSystem.java:559) at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:551) at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:355) at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:212)
在windows开发服务器上安装好cygwin,然后在环境变量中添加cygwin的bin目录,比如“D:\Program\cygwin\bin”,问题得以解决。
注意是在系统环境变量的Path上加入“D:\Program\cygwin\bin”这样我们就行像在linux上运行命令一样在windows的dos窗口下执行ls,cd等命令。刚开始我一直是在系统环境变量下的classpath上加的D:\Program\cygwin\bin”,尝试了半天都是失败,后来才发现,是加错了地方。加完之后再cmd窗口中试一下ls,cd等命令。成功后重启myeclipse运行代码,就没有问题了。
------------------------------------------------------------------------------
之前纠结集成的Mapper和Reducer的问题,有两个文件可以继承,具体问题见:
http://wuyanzan60688.blog.163.com/blog/static/127776163201310164244955/