跟我学hadoop学习2
程序员文章站
2022-07-14 22:59:00
...
import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; public class DateRangePathFilter implements PathFilter { private final Pattern PATTERN = Pattern.compile("^.*/(\\d\\d\\d\\d/\\d\\d/\\d\\d).*$"); private final Date start, end; public DateRangePathFilter(Date start, Date end) { this.start = new Date(start.getTime()); this.end = new Date(end.getTime()); } public boolean accept(Path path) { Matcher matcher = PATTERN.matcher(path.toString()); if (matcher.matches()) { DateFormat format = new SimpleDateFormat("yyyy/MM/dd"); try { return inInterval(format.parse(matcher.group(1))); } catch (ParseException e) { return false; } } return false; } private boolean inInterval(Date date) { return !date.before(start) && !date.after(end); } } // cc FileCopyWithProgress Copies a local file to a Hadoop filesystem, and shows progress import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Progressable; // vv FileCopyWithProgress public class FileCopyWithProgress { public static void main(String[] args) throws Exception { String localSrc = args[0]; String dst = args[1]; InputStream in = new BufferedInputStream(new FileInputStream(localSrc)); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(dst), conf); OutputStream out = fs.create(new Path(dst), new Progressable() { public void progress() { System.out.print("."); } }); IOUtils.copyBytes(in, out, 4096, true); } } // ^^ FileCopyWithProgress // cc FileSystemCat Displays files from a Hadoop filesystem on standard output by using the FileSystem directly import java.io.InputStream; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; // vv FileSystemCat public class FileSystemCat { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); InputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } } // ^^ FileSystemCat // cc FileSystemDoubleCat Displays files from a Hadoop filesystem on standard output twice, by using seek import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; // vv FileSystemDoubleCat public class FileSystemDoubleCat { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); FSDataInputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0); // go back to the start of the file IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } } // ^^ FileSystemDoubleCat // cc ListStatus Shows the file statuses for a collection of paths in a Hadoop filesystem import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; // vv ListStatus public class ListStatus { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); Path[] listedPaths = FileUtil.stat2Paths(status); for (Path p : listedPaths) { System.out.println(p); } } } // ^^ ListStatus // cc RegexExcludePathFilter A PathFilter for excluding paths that match a regular expression import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; // vv RegexExcludePathFilter public class RegexExcludePathFilter implements PathFilter { private final String regex; public RegexExcludePathFilter(String regex) { this.regex = regex; } public boolean accept(Path path) { return !path.toString().matches(regex); } } // ^^ RegexExcludePathFilter import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; public class RegexPathFilter implements PathFilter { private final String regex; private final boolean include; public RegexPathFilter(String regex) { this(regex, true); } public RegexPathFilter(String regex, boolean include) { this.regex = regex; this.include = include; } public boolean accept(Path path) { return (path.toString().matches(regex)) ? include : !include; } } // cc URLCat Displays files from a Hadoop filesystem on standard output using a URLStreamHandler import java.io.InputStream; import java.net.URL; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.apache.hadoop.io.IOUtils; // vv URLCat public class URLCat { static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } public static void main(String[] args) throws Exception { InputStream in = null; try { in = new URL(args[0]).openStream(); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } } // ^^ URLCat
上一篇: java jftp j-ftp 解决支持中文的问题
下一篇: SchedulerFactory使用