java实现简单的网络爬虫
程序员文章站
2022-06-05 18:47:17
...
/**
*
*/
package com.sunlei;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*@author 作者:sunlei
*@version 创建时间:2018年3月6日上午10:50:47
*网络爬虫取链接
*/
public class WebSpiderTest {
public static String getURLContent(String urlStr) {
StringBuilder sb = new StringBuilder();
try {
URL url = new URL(urlStr);
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream()));
String temp = "";
while((temp=reader.readLine()) != null) {
sb.append(temp);
}
} catch (IOException e) {
e.printStackTrace();
}
} catch (MalformedURLException e) {
e.printStackTrace();
}
return sb.toString();
}
public static void main(String[] args) {
String destStr = getURLContent("http://www.163.com");
// Pattern p = Pattern.compile("<a.+?</a>");
Pattern p = Pattern.compile("href=\"(.+?)\"");
Matcher m = p.matcher(destStr);
while(m.find()) {
System.out.println(m.group(1));
}
}
}