System memory 259522560 must be at least 4.718592
[学习笔记]
/*没有下面的话, 会报一个错误,java.lang.illegalargumentexception: system memory 259522560 must be at least 4.718592e8(470m). please use a larger heap size.这是memory不够,导致无法启动sparkcontext*/
conf.set("spark.testing.memory", "2000000000");
javasparkcontext sc = new javasparkcontext(conf);
/*下面的这种倒入的方法也行*/
// javardd<string> text = sc.textfile("hdfs://localhost:9000/readme.txt");
/*原文件是:o1abc 45
o1abc 77
o1abc o1abc */
javardd<string> text = sc.textfile("e://temp//input//friend.txt");
list<string> strlist = text.collect();
/*输出str:o1abc 45
str:o1abc 77
str:o1abc o1abc*/
for (string str : strlist) {
system.out.println("str:" + str);
}
/*interface flatmapfunction<t,r>, iterable<r> call(t t)(注意之后的版本,返回值有所变化。)*/
javardd<string> words = text.flatmap(new flatmapfunction<string, string>() {
/*list的super interface 是java.lang.iterable*/
public iterable<string> call(string line) throws exception {
system.out.println("flatmap once, line is "+line );
string[] wordsarray=line.split(" ");
list<string> wordslist=arrays.aslist(wordsarray);
return wordslist;
}
});
list<string> wordslist = words.collect();
/*输出
flatmap once, line is o1abc 45
flatmap once, line is o1abc 77
flatmap once, line is o1abc o1abc
word:o1abc
word:45
word:o1abc
word:77
word:o1abc
word:o1abc*/
for (string word : wordslist) {
system.out.println("word:" + word);
}
/* http://spark.apache.org/docs/latest/
interface pairfunction<t,k,v>
a function that returns key-value pairs (tuple2<k, v>), and can be used to construct pairrdds.
scala.tuple2<k,v> call(t t)
*/
/*
flatmap once, line is o1abc 45(这句说明前面语句再次被执行)
in tuple2 word: o1abc
文章转载原文: