Java获取网络文件并插入数据库的代码
程序员文章站
2022-10-15 10:21:53
获取百度的歌曲名,歌手和链接!! 复制代码 代码如下: package webtools; import java.io.bufferedreader; import ja...
获取百度的歌曲名,歌手和链接!!
package webtools;
import java.io.bufferedreader;
import java.io.ioexception;
import java.io.inputstreamreader;
import java.io.unsupportedencodingexception;
import java.net.malformedurlexception;
import java.net.url;
import java.util.arraylist;
import java.util.hashmap;
import java.util.list;
import java.util.regex.matcher;
import java.util.regex.pattern;
import dbtools.dbtools;
public class iotoweb {
public string gethtmlcontent(string htmlurl) {
url url = null;
string rowcontent = "";
stringbuffer htmlcontent = new stringbuffer();
try {
url = new url(htmlurl);
bufferedreader in = new bufferedreader(new inputstreamreader(url
.openstream(), "gb2312"));
while ((rowcontent = in.readline()) != null) {
htmlcontent.append(rowcontent);
}
in.close();
} catch (malformedurlexception e) {
// todo auto-generated catch block
e.printstacktrace();
} catch (unsupportedencodingexception e) {
// todo auto-generated catch block
e.printstacktrace();
} catch (ioexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
return htmlcontent.tostring();
}
public list getlink(string htmlcontent) {
arraylist listlink = new arraylist();
string regex = "<td[^>]*>[\\(]*<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)[\\)]*[\\s]*</td>";
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
while (matcher.find()) {
listlink.add(matcher.group());
}
return listlink;
}
public list<string> gethref(string htmlcontent) {
string regex;
list listthref = new arraylist();
regex = "href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))\"";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
listthref.add(ma.group().replacefirst("href=\"", "").replace("\"",
""));
}
return listthref;
}
public list<string> getperson(string htmlcontent) {
string regex;
list list = new arraylist();
regex = "\\(<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)\\)";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
list.add(ma.group().replacefirst("href=\"", "").replace("\"", ""));
}
return list;
}
public list<string> getsongname(string htmlcontent) {
string regex;
list listperson = new arraylist();
regex = "<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)</a>\\s";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
listperson.add(ma.group());
}
return listperson;
}
public string getmaincontent(string htmlcontent) {
string regex = "<table width=\"100%\" align=\"center\" cellpadding=\"0\" cellspacing=\"0\" class=\"list\">(.*?)</table>";
stringbuffer maincontent = new stringbuffer();
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
while (matcher.find()) {
maincontent.append(matcher.group());
}
return maincontent.tostring();
}
public string outtag(final string s) {
return s.replaceall("<.*?>", "");
}
dbtools dbtools = new dbtools();
public void getfrombaidumap3(string htmlurl) throws throwable {
hashmap htmlcontentmap = new hashmap();
string htmlcontent = gethtmlcontent(htmlurl);
string maincontent = getmaincontent(htmlcontent);
list listlink = getlink(maincontent);
for (int j = 0; j < listlink.size(); j++) {
string tdtag = listlink.get(j).tostring();
list songnamelist = getsongname(tdtag);
string songname = outtag(songnamelist.get(0).tostring());
list personlist = getperson(tdtag);
string songperson = "";
if (personlist.size() != 0) {
for (int n = 0; n < personlist.size(); n++) {
// system.out.println(personlist.get(n).tostring());
songperson = outtag(personlist.get(n).tostring());
}
} else {
songperson = "无";
}
// system.out.print(songnamelist.get(0).tostring());
list hreflist = gethref(songnamelist.get(0).tostring());
string songhref = hreflist.get(0).tostring();
system.out.println();
string sql = "insert into song(songname,songperson,songhref) values(?,?,?)";
arraylist list_values = new arraylist();
list_values.add(songname);
list_values.add(songperson);
list_values.add(songhref);
dbtools.update(sql, list_values);
}
}
}
dbtools数据库链接类:
package dbtools;
import java.util.arraylist;
import java.sql.*;
public class dbtools {
private preparedstatement preparedstatement;
private resultset resultset;
private connection connection;
public dbtools() {
try {
class.forname("com.mysql.jdbc.driver");
} catch (classnotfoundexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
try {
connection = drivermanager.getconnection(
"jdbc:mysql://localhost:3306/testurl", "root", "zhuyi");
} catch (sqlexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
}
public arraylist query(string sql, arraylist list_values) throws throwable {
arraylist listrows = new arraylist();
preparedstatement = connection.preparestatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedstatement.setobject(i + 1, list_values.get(i));
}
resultset = preparedstatement.executequery();
while (resultset.next()) {
string[] rowinfo = new string[resultset.getmetadata()
.getcolumncount()];
for (int i = 0; i < rowinfo.length; i++) {
rowinfo[i] = resultset.getstring(i + 1);
}
listrows.add(rowinfo);
}
return listrows;
}
public void update(string sql, arraylist list_values) throws throwable {
preparedstatement = connection.preparestatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedstatement.setobject(i + 1, list_values.get(i));
}
preparedstatement.executeupdate();
preparedstatement.close();
}
}
servlet调用:
package controller;
import java.io.ioexception;
import java.io.printwriter;
import java.util.list;
import javax.servlet.servletexception;
import javax.servlet.http.httpservlet;
import javax.servlet.http.httpservletrequest;
import javax.servlet.http.httpservletresponse;
import webtools.iotoweb;
public class testurl extends httpservlet {
/**
* constructor of the object.
*/
public testurl() {
super();
}
/**
* destruction of the servlet. <br>
*/
public void destroy() {
super.destroy(); // just puts "destroy" string in log
// put your code here
}
/**
* the doget method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void doget(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
try {
iotoweb iotoweb = new iotoweb();
iotoweb.getfrombaidumap3("http://list.mp3.baidu.com/topso/mp3topsong.html?id=1?top2");
} catch (throwable e) {
// todo auto-generated catch block
e.printstacktrace();
}
}
/**
* the dopost method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void dopost(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
response.setcontenttype("text/html");
printwriter out = response.getwriter();
out
.println("<!doctype html public \"-//w3c//dtd html 4.01 transitional//en\">");
out.println("<html>");
out.println(" <head><title>a servlet</title></head>");
out.println(" <body>");
out.print(" this is ");
out.print(this.getclass());
out.println(", using the post method");
out.println(" </body>");
out.println("</html>");
out.flush();
out.close();
}
/**
* initialization of the servlet. <br>
*
* @throws servletexception
* if an error occurs
*/
public void init() throws servletexception {
// put your code here
}
}
获取金书网的图书名:
package webtools;
import java.io.bufferedreader;
import java.io.inputstreamreader;
import java.net.url;
import java.util.arraylist;
import java.util.list;
import java.util.regex.matcher;
import java.util.regex.pattern;
import dbtools.dbtools;
public class getbook {
public string gethtmlcontent(string htmlurl) throws throwable {
url url = null;
string rowcontent = "";
stringbuffer htmlcontent = new stringbuffer();
url = new url(htmlurl);
bufferedreader in = new bufferedreader(new inputstreamreader(url
.openstream(), "gb2312"));
while ((rowcontent = in.readline()) != null) {
htmlcontent.append(rowcontent);
}
in.close();
return htmlcontent.tostring();
}
public string getbookname(string htmlcontent) {
string bookname = "";
string regex = "<span class=\"style15\">[^>]*</span>";
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
if (matcher.find()) {
bookname = matcher.group();
}
return bookname;
}
public string outtag(final string s) {
return s.replaceall("<.*?>", "");
}
dbtools dbtools = new dbtools();
public void getfromjinshu(string htmlurl) throws throwable {
string htmlcontent = gethtmlcontent(htmlurl);
string bookname = outtag(getbookname(htmlcontent));
if (bookname != null && !"".equals(bookname)) {
system.out.println(bookname);
string sql = "insert into bookinfo(bookname) values(?)";
arraylist list_values = new arraylist();
list_values.add(bookname);
dbtools.update(sql, list_values);
}
}
}
调用servlet:
package controller;
import java.io.ioexception;
import java.io.printwriter;
import javax.servlet.servletexception;
import javax.servlet.http.httpservlet;
import javax.servlet.http.httpservletrequest;
import javax.servlet.http.httpservletresponse;
import webtools.getbook;
public class testbook extends httpservlet {
/**
* constructor of the object.
*/
public testbook() {
super();
}
/**
* destruction of the servlet. <br>
*/
public void destroy() {
super.destroy(); // just puts "destroy" string in log
// put your code here
}
/**
* the doget method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
int i = 1;
public void doget(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
getbook bookinfo = new getbook();
for (; i < 10000; i++) {
string bookurl = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getfromjinshu(bookurl);
} catch (throwable e) {
i++;
dopost(request, response);
}
}
}
/**
* the dopost method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void dopost(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
getbook bookinfo = new getbook();
for (; i < 10000; i++) {
string bookurl = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getfromjinshu(bookurl);
} catch (throwable e) {
i++;
doget(request, response);
}
}
}
/**
* initialization of the servlet. <br>
*
* @throws servletexception
* if an error occurs
*/
public void init() throws servletexception {
// put your code here
}
}
每种功能的实现方法有很多,希望各位可以交流不同的思想和方法。可以加qq412546724。呵呵
复制代码 代码如下:
package webtools;
import java.io.bufferedreader;
import java.io.ioexception;
import java.io.inputstreamreader;
import java.io.unsupportedencodingexception;
import java.net.malformedurlexception;
import java.net.url;
import java.util.arraylist;
import java.util.hashmap;
import java.util.list;
import java.util.regex.matcher;
import java.util.regex.pattern;
import dbtools.dbtools;
public class iotoweb {
public string gethtmlcontent(string htmlurl) {
url url = null;
string rowcontent = "";
stringbuffer htmlcontent = new stringbuffer();
try {
url = new url(htmlurl);
bufferedreader in = new bufferedreader(new inputstreamreader(url
.openstream(), "gb2312"));
while ((rowcontent = in.readline()) != null) {
htmlcontent.append(rowcontent);
}
in.close();
} catch (malformedurlexception e) {
// todo auto-generated catch block
e.printstacktrace();
} catch (unsupportedencodingexception e) {
// todo auto-generated catch block
e.printstacktrace();
} catch (ioexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
return htmlcontent.tostring();
}
public list getlink(string htmlcontent) {
arraylist listlink = new arraylist();
string regex = "<td[^>]*>[\\(]*<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)[\\)]*[\\s]*</td>";
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
while (matcher.find()) {
listlink.add(matcher.group());
}
return listlink;
}
public list<string> gethref(string htmlcontent) {
string regex;
list listthref = new arraylist();
regex = "href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))\"";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
listthref.add(ma.group().replacefirst("href=\"", "").replace("\"",
""));
}
return listthref;
}
public list<string> getperson(string htmlcontent) {
string regex;
list list = new arraylist();
regex = "\\(<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)\\)";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
list.add(ma.group().replacefirst("href=\"", "").replace("\"", ""));
}
return list;
}
public list<string> getsongname(string htmlcontent) {
string regex;
list listperson = new arraylist();
regex = "<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)</a>\\s";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
listperson.add(ma.group());
}
return listperson;
}
public string getmaincontent(string htmlcontent) {
string regex = "<table width=\"100%\" align=\"center\" cellpadding=\"0\" cellspacing=\"0\" class=\"list\">(.*?)</table>";
stringbuffer maincontent = new stringbuffer();
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
while (matcher.find()) {
maincontent.append(matcher.group());
}
return maincontent.tostring();
}
public string outtag(final string s) {
return s.replaceall("<.*?>", "");
}
dbtools dbtools = new dbtools();
public void getfrombaidumap3(string htmlurl) throws throwable {
hashmap htmlcontentmap = new hashmap();
string htmlcontent = gethtmlcontent(htmlurl);
string maincontent = getmaincontent(htmlcontent);
list listlink = getlink(maincontent);
for (int j = 0; j < listlink.size(); j++) {
string tdtag = listlink.get(j).tostring();
list songnamelist = getsongname(tdtag);
string songname = outtag(songnamelist.get(0).tostring());
list personlist = getperson(tdtag);
string songperson = "";
if (personlist.size() != 0) {
for (int n = 0; n < personlist.size(); n++) {
// system.out.println(personlist.get(n).tostring());
songperson = outtag(personlist.get(n).tostring());
}
} else {
songperson = "无";
}
// system.out.print(songnamelist.get(0).tostring());
list hreflist = gethref(songnamelist.get(0).tostring());
string songhref = hreflist.get(0).tostring();
system.out.println();
string sql = "insert into song(songname,songperson,songhref) values(?,?,?)";
arraylist list_values = new arraylist();
list_values.add(songname);
list_values.add(songperson);
list_values.add(songhref);
dbtools.update(sql, list_values);
}
}
}
dbtools数据库链接类:
复制代码 代码如下:
package dbtools;
import java.util.arraylist;
import java.sql.*;
public class dbtools {
private preparedstatement preparedstatement;
private resultset resultset;
private connection connection;
public dbtools() {
try {
class.forname("com.mysql.jdbc.driver");
} catch (classnotfoundexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
try {
connection = drivermanager.getconnection(
"jdbc:mysql://localhost:3306/testurl", "root", "zhuyi");
} catch (sqlexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
}
public arraylist query(string sql, arraylist list_values) throws throwable {
arraylist listrows = new arraylist();
preparedstatement = connection.preparestatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedstatement.setobject(i + 1, list_values.get(i));
}
resultset = preparedstatement.executequery();
while (resultset.next()) {
string[] rowinfo = new string[resultset.getmetadata()
.getcolumncount()];
for (int i = 0; i < rowinfo.length; i++) {
rowinfo[i] = resultset.getstring(i + 1);
}
listrows.add(rowinfo);
}
return listrows;
}
public void update(string sql, arraylist list_values) throws throwable {
preparedstatement = connection.preparestatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedstatement.setobject(i + 1, list_values.get(i));
}
preparedstatement.executeupdate();
preparedstatement.close();
}
}
servlet调用:
复制代码 代码如下:
package controller;
import java.io.ioexception;
import java.io.printwriter;
import java.util.list;
import javax.servlet.servletexception;
import javax.servlet.http.httpservlet;
import javax.servlet.http.httpservletrequest;
import javax.servlet.http.httpservletresponse;
import webtools.iotoweb;
public class testurl extends httpservlet {
/**
* constructor of the object.
*/
public testurl() {
super();
}
/**
* destruction of the servlet. <br>
*/
public void destroy() {
super.destroy(); // just puts "destroy" string in log
// put your code here
}
/**
* the doget method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void doget(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
try {
iotoweb iotoweb = new iotoweb();
iotoweb.getfrombaidumap3("http://list.mp3.baidu.com/topso/mp3topsong.html?id=1?top2");
} catch (throwable e) {
// todo auto-generated catch block
e.printstacktrace();
}
}
/**
* the dopost method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void dopost(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
response.setcontenttype("text/html");
printwriter out = response.getwriter();
out
.println("<!doctype html public \"-//w3c//dtd html 4.01 transitional//en\">");
out.println("<html>");
out.println(" <head><title>a servlet</title></head>");
out.println(" <body>");
out.print(" this is ");
out.print(this.getclass());
out.println(", using the post method");
out.println(" </body>");
out.println("</html>");
out.flush();
out.close();
}
/**
* initialization of the servlet. <br>
*
* @throws servletexception
* if an error occurs
*/
public void init() throws servletexception {
// put your code here
}
}
获取金书网的图书名:
复制代码 代码如下:
package webtools;
import java.io.bufferedreader;
import java.io.inputstreamreader;
import java.net.url;
import java.util.arraylist;
import java.util.list;
import java.util.regex.matcher;
import java.util.regex.pattern;
import dbtools.dbtools;
public class getbook {
public string gethtmlcontent(string htmlurl) throws throwable {
url url = null;
string rowcontent = "";
stringbuffer htmlcontent = new stringbuffer();
url = new url(htmlurl);
bufferedreader in = new bufferedreader(new inputstreamreader(url
.openstream(), "gb2312"));
while ((rowcontent = in.readline()) != null) {
htmlcontent.append(rowcontent);
}
in.close();
return htmlcontent.tostring();
}
public string getbookname(string htmlcontent) {
string bookname = "";
string regex = "<span class=\"style15\">[^>]*</span>";
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
if (matcher.find()) {
bookname = matcher.group();
}
return bookname;
}
public string outtag(final string s) {
return s.replaceall("<.*?>", "");
}
dbtools dbtools = new dbtools();
public void getfromjinshu(string htmlurl) throws throwable {
string htmlcontent = gethtmlcontent(htmlurl);
string bookname = outtag(getbookname(htmlcontent));
if (bookname != null && !"".equals(bookname)) {
system.out.println(bookname);
string sql = "insert into bookinfo(bookname) values(?)";
arraylist list_values = new arraylist();
list_values.add(bookname);
dbtools.update(sql, list_values);
}
}
}
调用servlet:
复制代码 代码如下:
package controller;
import java.io.ioexception;
import java.io.printwriter;
import javax.servlet.servletexception;
import javax.servlet.http.httpservlet;
import javax.servlet.http.httpservletrequest;
import javax.servlet.http.httpservletresponse;
import webtools.getbook;
public class testbook extends httpservlet {
/**
* constructor of the object.
*/
public testbook() {
super();
}
/**
* destruction of the servlet. <br>
*/
public void destroy() {
super.destroy(); // just puts "destroy" string in log
// put your code here
}
/**
* the doget method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
int i = 1;
public void doget(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
getbook bookinfo = new getbook();
for (; i < 10000; i++) {
string bookurl = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getfromjinshu(bookurl);
} catch (throwable e) {
i++;
dopost(request, response);
}
}
}
/**
* the dopost method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void dopost(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
getbook bookinfo = new getbook();
for (; i < 10000; i++) {
string bookurl = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getfromjinshu(bookurl);
} catch (throwable e) {
i++;
doget(request, response);
}
}
}
/**
* initialization of the servlet. <br>
*
* @throws servletexception
* if an error occurs
*/
public void init() throws servletexception {
// put your code here
}
}
每种功能的实现方法有很多,希望各位可以交流不同的思想和方法。可以加qq412546724。呵呵
下一篇: 茼蒿含钾吗,来看看
推荐阅读