Jsoup 与 httpClient 获取网络资源
程序员文章站
2022-05-05 20:39:27
...
背景:调用openedx api 获取返回响应结果
http://mvnrepository.com/ 搜索后下载所需jar
//HTML解析器(edx项目使用)
compile group: 'org.jsoup', name: 'jsoup', version: '1.10.3'
//爬取网页所需部分内容
public static String getHTML(String url){
String finalResult = null;
try {
Document document = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31")
.timeout(60000)
.get();
Elements resultElement = document.getElementsByClass("response-info");
String resultString = resultElement.toString();
finalResult= resultString.substring(resultString.indexOf("{"),resultString.lastIndexOf("}")+1);
while(finalResult.contains("</a>")){
String hrefString = finalResult.substring(finalResult.indexOf("<a"), finalResult.indexOf("</a>")+4);
String finalHref = hrefString.substring(hrefString.indexOf(">")+1, hrefString.lastIndexOf("<"));
finalResult = finalResult.replace(hrefString, finalHref);
}
} catch (IOException e) {
e.printStackTrace();
}
return finalResult;
}
//httpClient请求响应 (edx项目使用)
compile group: 'commons-logging', name: 'commons-logging', version: '1.1.1'
compile group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.2'
compile group: 'org.apache.httpcomponents', name: 'httpcore', version: '4.4.6'
compile group: 'org.apache.httpcomponents', name: 'httpmime', version: '4.5.2'
public static String getCourseWithHttpClient() {
httpClient = HttpClients.createDefault();
CloseableHttpResponse response = null;
BufferedReader reader = null;
HttpGet httpGet = new HttpGet(url);
try {
response = httpClient.execute(httpGet);
HttpEntity entity = response.getEntity();
reader = new BufferedReader(new InputStreamReader(entity.getContent(), "utf-8"));
courses = reader.readLine();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
logger.info(" courses :" + courses);
return courses;
}
上一篇: 商业项目中代码质量是否重要?
下一篇: php curl 访问出错