java爬虫查成语应用程序Swing编程
根据看了一些java爬虫的书籍做了一个简单的java爬虫的应用小程序,以下就是教程:
压缩包下载链接请点击:源代码
一、解析Html网页
首先要做一个查成语的应用,我们要选取一个查成语的网站
我选取的是 乐乐课堂查成语这个网站 (乐乐课堂)http://www.leleketang.com/chengyu
每一网站都不一样的,因为每一个网站所使用的class,id 都是不一样的,可以说就是根据class 和id等这些来判断词语所在的位置。
点开乐乐课堂这个网站
输入天点击查询
http://www.leleketang.com/chengyu/results.php?k=天
可以发现它是一个get请求,点击下一页
http://www.leleketang.com/chengyu/results.php?k=天&p=2
可以知道查询条件就是k值、页数为p
然后按F12进入网页源代码查看
找到所查的成语的那一部分代码
二、新建工程项目
新建一个工程,可以新建动态web,maven ,java工程等,
导入包,要导入的包
三、新建model对象
我们看了那个网站的结构后,我就选取了三个字段,成语,成语拼音,成语解释,如果想把其他的标签呀之类的选取好都可以
新建一个Model对象、代码如下
package chengyu;
public class CyModel {
private String cyName;
private String cyPinYin;
private String cyDetal;
public String getCyName() {
return cyName;
}
public void setCyName(String cyName) {
this.cyName = cyName;
}
public String getCyPinYin() {
return cyPinYin;
}
public void setCyPinYin(String cyPinYin) {
this.cyPinYin = cyPinYin;
}
public String getCyDetal() {
return cyDetal;
}
public void setCyDetal(String cyDetal) {
this.cyDetal = cyDetal;
}
public CyModel(){
}
public CyModel(String cyName, String cyPinYin, String cyDetal) {
super();
this.cyName = cyName;
this.cyPinYin = cyPinYin;
this.cyDetal = cyDetal;
}
@Override
public String toString() {
return "CyModel [cyName=" + cyName + ", cyPinYin=" + cyPinYin + ", cyDetal=" + cyDetal + "]";
}
}
四、新建解析URL
解析我们所知道的URL,使用Jsoup解析,使用HttpClient 来发送或者获取请求
废话就不多说了,直接看代码:
package chengyu;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class CyUtil {
public Integer sumNums = 0;//查询到成语的总数
public Integer getSumNums() {
return sumNums;
}
public void setSumNums(Integer sumNums) {
this.sumNums = sumNums;
}
public List<CyModel> getUrl(String url){
String html = getJsoupUrl(url);
List<CyModel> cymodels = getCyModel(html);
for(CyModel model : cymodels){
System.out.println(model.toString());
}
return cymodels;
}
//获取资源路径
private String getJsoupUrl(String url) {
CloseableHttpClient httpClient = HttpClients.createDefault();
try{
HttpGet cyGet = new HttpGet(url);
CloseableHttpResponse response = httpClient.execute(cyGet);
try{
HttpEntity entity = response.getEntity();
if(entity != null){
return EntityUtils.toString(entity,"utf-8");
}
}finally{
response.close();
}
}catch(Exception e){
e.printStackTrace();
}
return null;
}
//解析url获取字段值
private List<CyModel> getCyModel(String html) {
List<CyModel> cyModels = new ArrayList<CyModel>();
String cyName = "", cyPinYin = "",cyDetail = "";
Document document = Jsoup.parse(html);
Pattern pattern = Pattern.compile("[^0-9]");
String number = document.select("div[class = wrapper_right").select("h2").text();
sumNums = Integer.valueOf(number.replaceAll("\\D",""));
Elements elements = document.select("div[class = text_list1]").select("div[class = idiom_list]");
for(Element ele : elements){
cyName = ele.select("div").select("a[class = idiom_list_title]").get(0).text();
cyPinYin = ele.select("div").select("[class = idiom_list_pinyin]").get(0).text();
cyDetail = ele.select("div[class = idiom_list_explain]").get(0).text().replaceAll("成语解释:", " ");
CyModel model = new CyModel(cyName,cyPinYin,cyDetail);
cyModels.add(model);
}
return cyModels;
}
}
五、新建Swing界面应用
可以查询了以后编写Swing界面文件
只是实现了这些功能就没有怎么修饰
上面有一个输入框输入要查询的内容,点击查询就可以看到下面爬取到的成语
右下角输入跳转的页面数点击跳转就直接爬取那个页面的成语了
代码如下:
package chengyu;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.util.List;
import javax.swing.ImageIcon;
import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTable;
import javax.swing.JTextField;
import javax.swing.table.DefaultTableModel;
public class CyMain implements ActionListener{
public static void main(String[] args) {
new CyMain().frameNew();
}
static Integer sc = 1;
JFrame frame = new JFrame("成语查查查");
JPanel panOne = new JPanel();
JPanel panTwo = new JPanel();
JTable table = new JTable();
JScrollPane jsp = new JScrollPane(table);
JLabel tale = new JLabel("查成语 ");
JTextField findText = new JTextField(15);// 查成语输入的框,条件框
JButton findButton = new JButton("查找");//点击按钮
JLabel taleTwo = new JLabel("查找结果如下:");
Object[][] p = new Object[15][4];
JLabel labe = new JLabel();
JLabel numLab = new JLabel();
JButton nextPage = new JButton("跳转");
JTextField textd = new JTextField(5);
static String url = "http://www.leleketang.com/chengyu/results.php?k=";
static String kString = "";
static Integer pNumber = 1;
static Integer numbers = 0;
CyUtil cyUtil = new CyUtil();
public void frameNew(){
//新建一个界面
findButton.addActionListener(this);
panOne.add(tale);
panOne.add(findText);
panOne.add(findButton);
panOne.add(taleTwo);
panOne.setBounds(10,10,480,30);
panOne.setVisible(true);
panTwo.setBounds(10, 70, 460, 460);
labe.setIcon(new ImageIcon("src/main/resources/backgroup.jpg"));
panTwo.add(labe);
frame.setLayout(null);
frame.add(panOne);
frame.add(panTwo);
frame.setSize(500, 400);
frame.setVisible(true);
}
public void actionPerformed(ActionEvent e) {
if(e.getActionCommand().equals("查找")){
if(!findText.getText().trim().isEmpty()){
kString = findText.getText().trim();
List<CyModel> cymodels = cyUtil.getUrl(url+kString);
if(cymodels.isEmpty()){
pNumber = 0;
}else{
pNumber = 1;
}
numbers = cyUtil.getSumNums()%15 > 0?cyUtil.getSumNums()/15+1 : cyUtil.getSumNums()/15 ;
getModel(cymodels);
}
}
else if(e.getActionCommand().equals("跳转")){
pNumber = Integer.valueOf(textd.getText());
if(pNumber > 0 && pNumber <= numbers){
List<CyModel> cymodels = cyUtil.getUrl(url+kString+"&p="+pNumber);
getModel(cymodels);
}
}
}
public void getModel(List<CyModel> cymodels){
for(int i = 0; i < cymodels.size() ;i++){
p[i][0] = i+1;
p[i][1] = cymodels.get(i).getCyName();
p[i][2] = cymodels.get(i).getCyPinYin();
p[i][3] = cymodels.get(i).getCyDetal();
}
String[] n = { "序号", "成语", "拼音", "解释" };
table.setModel(new DefaultTableModel(p, n) {
@Override
public boolean isCellEditable(int row, int column) {
if (column == 0) {
return false;
}
return false;
}
});
table.setRowHeight(25);
panTwo.remove(labe);
panTwo.add(jsp);
numLab.setText("第- "+pNumber+" -页 总共- "+numbers+" -页");
panTwo.add(numLab);
nextPage.addActionListener(this);
if(numbers > 1){
panTwo.add(textd);
panTwo.add(nextPage);
}else{
panTwo.remove(textd);
panTwo.remove(nextPage);
}
frame.setSize(500, 570);
frame.validate();
}
}
六、运行应用程序和结果
运行后界面如下:
输入天点击查询后的界面:
输入5点击查询的界面
以上的应用成语有好多可以完善的,下一篇将使用JavaFX编辑页面,并且会把爬取的数据直接存取到数据库中!
如有不对,请多多指教,谢谢!
推荐阅读