java实现获取网站的keywords,description
程序员文章站
2024-03-02 10:42:16
获取网站的
package cn.evan.util;
import java.io.ioexception;
import org.jsoup.jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.element;
import org.jsoup.select.elements;
public class semanticcrawl {
public static void main(string[] args) {
document doc = null;
try {
doc = jsoup.connect("网址").get();
} catch (ioexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
string title = doc.title();
elements metas = doc.head().select("meta");
for (element meta : metas) {
string content = meta.attr("content");
if ("keywords".equalsignorecase(meta.attr("name"))) {
system.out.println("关键字:"+content);
}
if ("description".equalsignorecase(meta.attr("name"))) {
system.out.println("网站内容描述:"+content);
}
}
elements keywords = doc.getelementsbytag("meta");
system.out.println("标题"+title);
}
}
获取网站的<meta name="keywords" content="" />和<meta name="description" content="“>关键字和描述内容
实现html解析器jsoup
下载jsoup的lib地址:
复制代码 代码如下:
package cn.evan.util;
import java.io.ioexception;
import org.jsoup.jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.element;
import org.jsoup.select.elements;
public class semanticcrawl {
public static void main(string[] args) {
document doc = null;
try {
doc = jsoup.connect("网址").get();
} catch (ioexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
string title = doc.title();
elements metas = doc.head().select("meta");
for (element meta : metas) {
string content = meta.attr("content");
if ("keywords".equalsignorecase(meta.attr("name"))) {
system.out.println("关键字:"+content);
}
if ("description".equalsignorecase(meta.attr("name"))) {
system.out.println("网站内容描述:"+content);
}
}
elements keywords = doc.getelementsbytag("meta");
system.out.println("标题"+title);
}
}
以上所述就是本文分享的全部内容了,希望大家能够喜欢。