hadoop实现grep示例分享
hadoop做的一个简单grep程序,可从文档中提取包含某些字符串的行
/*
* 一个简单grep程序,可从文档中提取包含莫些字符串的行
*/
public class grep extends configured implements tool{
public static class grepmap extends mapper<longwritable, text, text,nullwritable>{
public void map(longwritable line,text value,context context) throws ioexception, interruptedexception{
//通过configuration获取参数
string str = context.getconfiguration().get("grep");
if(value.tostring().contains(str)){
context.write(value, nullwritable.get());
}
}
}
@override
public int run(string[] args) throws exception {
if(args.length!=3){
system.out.println("error");
system.exit(1);
}
configuration configuration = getconf();
//传递参数
configuration.set("grep", args[2]);
job job = new job(configuration,"grep");
job.setjarbyclass(grep.class);
job.setmapperclass(grepmap.class);
job.setnumreducetasks(0);
job.setmapoutputkeyclass(text.class);
job.setoutputvalueclass(nullwritable.class);
path in = new path(args[0]);
path out = new path(args[1]);
filesystem filesystem = out.getfilesystem(configuration);
if(filesystem.exists(out))
filesystem.delete(out, true);
fileinputformat.addinputpath(job, in);
fileoutputformat.setoutputpath(job, out);
system.exit(job.waitforcompletion(true)?0:1);
return 0;
}