欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  web前端

一个Swing程序,用来判断一个URL页面内包含的好链接和坏链接数目_html/css_WEB-ITnose

程序员文章站 2022-05-30 21:50:42
...
入口类

import java.awt.Dimension;import java.awt.Insets;import java.awt.event.ActionEvent;import java.awt.event.ActionListener;import java.io.IOException;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import javax.swing.JButton;import javax.swing.JFrame;import javax.swing.JLabel;import javax.swing.JMenuBar;import javax.swing.JScrollPane;import javax.swing.JTextArea;import javax.swing.JTextField;import javax.swing.ScrollPaneConstants;import javax.swing.SwingUtilities;/** * Description 检查URL是否是合法的URL,入口类,直接运行该类,将需要分析的URL地址粘入文本框即可 *  * @author wangxu *  */public class CheckLinks extends JFrame implements Runnable, ISpiderReportable {	// Used by addNotify	boolean frameSizeAdjusted = false;	JLabel label1 = new JLabel();	JButton begin = new JButton();	JTextField url = new JTextField();	JScrollPane errorScroll = new JScrollPane();	JTextArea errors = new JTextArea();	JLabel current = new JLabel();	JLabel goodLinksLabel = new JLabel();	JLabel badLinksLabel = new JLabel();	protected Thread backgroundThread;	protected Spider spider;	protected URL base;	protected int badLinksCount = 0;	protected int goodLinksCount = 0;	private static final long serialVersionUID = 1L;	public CheckLinks() {		setTitle("Find Broken Links");// 设置JFrame的标题		getContentPane().setLayout(null);// 设置布局方式		setSize(405, 288);		setVisible(true);		label1.setText("Enter a URL:");		getContentPane().add(label1);		label1.setBounds(12, 12, 84, 12);		begin.setText("Begin");		begin.setActionCommand("Begin");		getContentPane().add(begin);		begin.setBounds(12, 36, 84, 24);// 设置坐标和宽、高		getContentPane().add(url);		url.setBounds(108, 36, 288, 24);		errorScroll.setAutoscrolls(true);// 自动显示滚动条		errorScroll.setHorizontalScrollBarPolicy(ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS);// 水平方向始终显示		errorScroll.setVerticalScrollBarPolicy(ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS);// 垂直方向始终显示		errorScroll.setOpaque(true);// 设置不透明		getContentPane().add(errorScroll);		errorScroll.setBounds(12, 120, 384, 156);		errors.setEditable(false);// 设置不可编辑		errorScroll.getViewport().add(errors);// 将文本域添加进滚动条		errors.setBounds(0, 0, 366, 138);		current.setText("Currently Processing: ");		getContentPane().add(current);// 加入显示当前信息的JLabel		current.setBounds(12, 72, 384, 12);		goodLinksLabel.setText("Good Links: 0");		getContentPane().add(goodLinksLabel);		goodLinksLabel.setBounds(12, 96, 192, 12);		badLinksLabel.setText("Bad Links: 0");		getContentPane().add(badLinksLabel);		badLinksLabel.setBounds(216, 96, 96, 12);		SymAction lSymAction = new SymAction();// 实例化一个事件监听器		begin.addActionListener(lSymAction);// 注册监听	}	static public void main(String args[]) {		new CheckLinks();// 程序入口	}	public void addNotify() {		// Record the size of the window prior to calling parent's addNotify.		Dimension size = getSize();		super.addNotify();		if (frameSizeAdjusted)			return;		frameSizeAdjusted = true;		// Adjust size of frame according to the insets and menu bar		Insets insets = getInsets();		JMenuBar menuBar = getRootPane().getJMenuBar();		int menuBarHeight = 0;		if (menuBar != null)			menuBarHeight = menuBar.getPreferredSize().height;		setSize(insets.left + insets.right + size.width, insets.top + insets.bottom + size.height + menuBarHeight);	}	class SymAction implements ActionListener {		public void actionPerformed(ActionEvent event) {			Object object = event.getSource();			if (object == begin)				begin_actionPerformed(event);		}	}	void begin_actionPerformed(ActionEvent event) {		if (backgroundThread == null) {			begin.setText("Cancel");			backgroundThread = new Thread(this);// 用当前对象来实例化一个Thread对象			backgroundThread.start();// 启动线程,执行run方法			goodLinksCount = 0;			badLinksCount = 0;		} else {			spider.cancel();// 设置标志位true		}	}	@Override	public void run() {		try {			errors.setText("");			spider = new Spider(this);// 用当前对象来实例化一个Spider对象,因为当前类实现了ISpiderReportable接口			spider.clear();			base = new URL(url.getText());// 取得需要搜索的URL地址			spider.addURL(base);//将URL地址加入spider			spider.begin();//spider开始工作			Runnable doLater = new Runnable() {				public void run() {					begin.setText("Begin");				}			};			// 导致 doRun.run() 在 AWT 事件指派线程上异步执行。在所有挂起的 AWT			// 事件被处理后才发生。此方法应该在应用程序线程需要更新该 GUI时使用。在下面的示例中,invokeLater			// 调用将事件指派线程上的 Runnable对象 doHelloWorld加入队列,然后输出一条信息。			SwingUtilities.invokeLater(doLater);			backgroundThread = null;// 将后台线程重新置空,以便接受下一个URL		} catch (MalformedURLException e) {			UpdateErrors err = new UpdateErrors();			err.msg = "Bad address.";			SwingUtilities.invokeLater(err);		}	}	//检测两个URL地址是否属于同一主机,如果是返回true,否则false	@Override	public boolean spiderFoundURL(URL base, URL url) {		UpdateCurrentStats cs = new UpdateCurrentStats();		cs.msg = url.toString();//将URL信息赋值给cs.msg,使用后台线程进行打印		SwingUtilities.invokeLater(cs);		if (!checkLink(url)) {			UpdateErrors err = new UpdateErrors();			err.msg = url + "(on page " + base + ")\n";			SwingUtilities.invokeLater(err);			badLinksCount++;			return false;		}		goodLinksCount++;		if (!url.getHost().equalsIgnoreCase(base.getHost()))			return false;		else			return true;	}	@Override	public void spiderURLError(URL url) {		System.out.println("没找到的URL:" + url);	}	protected boolean checkLink(URL url) {		try {			URLConnection connection = url.openConnection();			connection.connect();			return true;		} catch (IOException e) {			return false;		}	}	public void spiderFoundEMail(String email) {		System.out.println("获得Email:" + email);	}	class UpdateErrors implements Runnable {		public String msg;		public void run() {			errors.append(msg);		}	}	class UpdateCurrentStats implements Runnable {		public String msg;		public void run() {			current.setText("Currently Processing: " + msg);			goodLinksLabel.setText("Good Links: " + goodLinksCount);			badLinksLabel.setText("Bad Links: " + badLinksCount);		}	}}
import javax.swing.text.html.*;/** * Swing JEditorPane 文本组件通过称为 EditorKit 的插件机制来支持不同种类的内容。因为 HTML * 是很流行的内容格式,因此默认提供了某种支持。此类提供了 HTML version 3.2(带有某些扩展)的默认支持,并正在向 version 4.0 * 迁移。不支持  标记,但为  标记提供了某种支持。 *  * @author wangxu *  */public class HTMLParse extends HTMLEditorKit {	private static final long serialVersionUID = 1L;	public HTMLEditorKit.Parser getParser() {		return super.getParser();	}}
import java.net.*;public interface ISpiderReportable {	// 找到URL链接	public boolean spiderFoundURL(URL base, URL url);	public void spiderURLError(URL url);	// 找到Email的链接	public void spiderFoundEMail(String email);}
import java.util.*;import java.net.*;import java.io.*;import javax.swing.text.*;import javax.swing.text.html.*;public class Spider {	// 装载错误的工作集	protected Collection workloadError = new ArrayList(3);	// 等待工作集	protected Collection workloadWaiting = new ArrayList(3);	// 已处理的工作集	protected Collection workloadProcessed = new ArrayList(3);	protected ISpiderReportable report;	protected boolean cancel = false;	public Spider(ISpiderReportable report) {		this.report = report;	}	public Collection getWorkloadError() {		return workloadError;	}	public Collection getWorkloadWaiting() {		return workloadWaiting;	}	public Collection getWorkloadProcessed() {		return workloadProcessed;	}	public void clear() {		getWorkloadError().clear();		getWorkloadWaiting().clear();		getWorkloadProcessed().clear();	}	public void cancel() {		cancel = true;	}	public void addURL(URL url) {		if (getWorkloadWaiting().contains(url))// 如果等待的工作集中已经包含该URL,返回			return;		if (getWorkloadError().contains(url))// 如果出错的工作集中已经包含该URL,返回			return;		if (getWorkloadProcessed().contains(url))// 如果已处理的工作集中包含该URL,返回			return;		log("Adding to workload: " + url);		getWorkloadWaiting().add(url);// 将其加入等待的工作集中	}	// 具体分析URL的方法	public void processURL(URL url) {		try {			log("Processing: " + url);// 控制台打印处理的URL地址			// get the URL's contents			URLConnection connection = url.openConnection();			System.out.println(connection.getContentType() + "++++++++++++++++====");			if ((connection.getContentType() != null) && !connection.getContentType().toLowerCase().startsWith("text/")) {				getWorkloadWaiting().remove(url);				getWorkloadProcessed().add(url);				log("Not processing because content type is: " + connection.getContentType());				return;			}			// read the URL			InputStream is = connection.getInputStream();			Reader r = new InputStreamReader(is);			// parse the URL			HTMLEditorKit.Parser parse = new HTMLParse().getParser();			// Parse the given stream and drive the given callback with the			// results of the parse. This method should be implemented to be			// thread-safe.			// 解析给定的流并通过解析的结果驱动给定的回调。该方法执行完之后,会调用给定的回调函数			parse.parse(r, new Parser(url), true);		} catch (IOException e) {// 如果出错			getWorkloadWaiting().remove(url);// 从工作集中移除URL			getWorkloadError().add(url);// 将出错的URL加入错误的工作集			log("Error: " + url);			report.spiderURLError(url);// 报告该出错的URL			return;		}		// mark URL as complete		getWorkloadWaiting().remove(url);		getWorkloadProcessed().add(url);		log("Complete: " + url);	}	// 蜘蛛工作的方法,只要等待工作集不为空,并且标志位为false,那么一直从集合中取出URL	public void begin() {		cancel = false;		while (!getWorkloadWaiting().isEmpty() && !cancel) {			Object list[] = getWorkloadWaiting().toArray();			for (int i = 0; (i