java使用Jsoup连接网站超时的解决方法
今天做了一个jsoup解析网站的项目,使用jsoup.connect(url).get()连接某网站时偶尔会出现
java.net.sockettimeoutexception:read timed out异常。
原因是默认的socket的延时比较短,而有些网站的响应速度比较慢,
所以会发生超时的情况。
解决方法:
链接的时候设定超时时间即可。
doc = jsoup.connect(url).timeout(5000).get();
5000表示延时时间设置为5s。
测试代码如下:
1,不设定timeout时:
package jsouptest;
import java.io.ioexception;
import org.jsoup.*;
import org.jsoup.helper.validate;
import org.jsoup.nodes.document;
import org.jsoup.nodes.element;
import org.jsoup.select.elements;
public class jsouptest {
public static void main(string[] args) throws ioexception{
string url = "//www.jb51.net";
long start = system.currenttimemillis();
document doc=null;
try{
doc = jsoup.connect(url).get();
}
catch(exception e){
e.printstacktrace();
}
finally{
system.out.println("time is:"+(system.currenttimemillis()-start) + "ms");
}
elements elem = doc.getelementsbytag("title");
system.out.println("title is:" +elem.text());
}
}
有时发生超时:
java.net.sockettimeoutexception: read timed out
at java.net.socketinputstream.socketread0(native method)
at java.net.socketinputstream.read(unknown source)
at java.net.socketinputstream.read(unknown source)
at java.io.bufferedinputstream.fill(unknown source)
at java.io.bufferedinputstream.read1(unknown source)
at java.io.bufferedinputstream.read(unknown source)
at sun.net.www.http.chunkedinputstream.fastread(unknown source)
at sun.net.www.http.chunkedinputstream.read(unknown source)
at java.io.filterinputstream.read(unknown source)
at sun.net.www.protocol.http.httpurlconnection$httpinputstream.read(unknown source)
at java.util.zip.inflaterinputstream.fill(unknown source)
at java.util.zip.inflaterinputstream.read(unknown source)
at java.util.zip.gzipinputstream.read(unknown source)
at java.io.bufferedinputstream.read1(unknown source)
at java.io.bufferedinputstream.read(unknown source)
at java.io.filterinputstream.read(unknown source)
at org.jsoup.helper.datautil.readtobytebuffer(datautil.java:113)
at org.jsoup.helper.httpconnection$response.execute(httpconnection.java:447)
at org.jsoup.helper.httpconnection$response.execute(httpconnection.java:393)
at org.jsoup.helper.httpconnection.execute(httpconnection.java:159)
at org.jsoup.helper.httpconnection.get(httpconnection.java:148)
at jsouptest.jsouptest.main(jsouptest.java:17)
time is:3885ms
exception in thread "main" java.lang.nullpointerexception
at jsouptest.jsouptest.main(jsouptest.java:25)
2 设定了则一般不会超时
package jsouptest;
import java.io.ioexception;
import org.jsoup.*;
import org.jsoup.helper.validate;
import org.jsoup.nodes.document;
import org.jsoup.nodes.element;
import org.jsoup.select.elements;
public class jsouptest {
public static void main(string[] args) throws ioexception{
string url = "//www.jb51.net";
long start = system.currenttimemillis();
document doc=null;
try{
doc = jsoup.connect(url).timeout(5000).get();
}
catch(exception e){
e.printstacktrace();
}
finally{
system.out.println("time is:"+(system.currenttimemillis()-start) + "ms");
}
elements elem = doc.getelementsbytag("title");
system.out.println("title is:" +elem.text());
}
}