asp.net保存远程图片的代码
程序员文章站
2022-11-23 17:43:29
注意:并没有实现css中的图片采集,且图片的正则还有待完善。 复制代码 代码如下: using system; using system.data; using syste...
注意:并没有实现css中的图片采集,且图片的正则还有待完善。
using system;
using system.data;
using system.configuration;
using system.web;
using system.web.security;
using system.web.ui;
using system.web.ui.webcontrols;
using system.web.ui.webcontrols.webparts;
using system.web.ui.htmlcontrols;
//引入空间
using system.net;
using system.io;
using system.text;
using system.text.regularexpressions;
using system.collections;
/// <summary>
/// 采集
/// </summary>
public class caiji
{
public caiji()
{
//
// todo: 在此处添加构造函数逻辑
//
}
/// <summary>
/// 要采集的网页的连接地址
/// </summary>
/// <param name="url">url</param>
/// <returns></returns>
public static string caijibyurl(string url,string chargest,string path)
{
string str = getsourcetextbyurl(url,chargest);
arraylist lib = new arraylist();
int i = 0;
//根据url取得网站域名
uri uri = new uri(url);
//scheme或者协议,一般为http,host为取得域名
string baseurl = uri.scheme + "://" + uri.host + "/";
//提取出url,包括src等信息
//\s匹配任何非空白字符
regex g = new regex(@"(src=(""|\')\s+\.(gif|jpg|png|bmp)(""|\'))", regexoptions.multiline | regexoptions.ignorecase);
matchcollection m = g.matches(str);
foreach (match math in m)
{
//已经提取到图片的路径了,但还需要分绝对路径,相对路径,以及后缀名是否为图片,因为可能为.asp,.aspx这些,比如验证码图片
string imgurl = math.groups[0].value.tolower();//转成小写,=号之间可能有不定的空格
//去除src与单引号,双引号
imgurl = imgurl.replace("src","");
imgurl = imgurl.replace("\"","");
imgurl = imgurl.replace("'","");
imgurl = imgurl.replace("=","");
imgurl = imgurl.trim();
//路径处理
if (imgurl.substring(0, 4) != "http")
{
//需要判断是否是绝对路径还是相对路径
if (imgurl.substring(0, 1) == "/")
{
imgurl = baseurl + imgurl;
}
else
{
imgurl = url.substring(0,url.lastindexof("/") + 1) + imgurl;
}
}
//判断元素是否已经存在,-1为不存在
if (lib.indexof(imgurl) == -1)
{
lib.add(imgurl);
}
}
string str_ = string.empty;
webclient client = new webclient();
for (int j = 0; j < lib.count; j++)
{
string savepath = path + datetime.now.month + datetime.now.day + datetime.now.minute + datetime.now.second + j + lib[j].tostring().substring((lib[j].tostring().length) -4,4);
try
{
client.downloadfile(new uri(lib[j].tostring()), savepath);
str_ += lib[j].tostring() + "<br /> 保存路径为:" + savepath + "<br /><br />";
}
catch (exception e)
{
str_ += e.message;
}
}
return str_;
}
public static string getsourcetextbyurl(string url,string chargest)
{
webrequest request = webrequest.create(url);
request.timeout = 20000;//20秒超时
webresponse response = request.getresponse();
stream resstream = response.getresponsestream();
streamreader sr = new streamreader(resstream,encoding.getencoding(chargest));
return sr.readtoend();
}
}
使用:比如我是保存到upload文件夹中的:
string path = server.mappath("~/upload/");
response.write(caiji.caijibyurl(//www.jb51.net, "utf-8", path));
复制代码 代码如下:
using system;
using system.data;
using system.configuration;
using system.web;
using system.web.security;
using system.web.ui;
using system.web.ui.webcontrols;
using system.web.ui.webcontrols.webparts;
using system.web.ui.htmlcontrols;
//引入空间
using system.net;
using system.io;
using system.text;
using system.text.regularexpressions;
using system.collections;
/// <summary>
/// 采集
/// </summary>
public class caiji
{
public caiji()
{
//
// todo: 在此处添加构造函数逻辑
//
}
/// <summary>
/// 要采集的网页的连接地址
/// </summary>
/// <param name="url">url</param>
/// <returns></returns>
public static string caijibyurl(string url,string chargest,string path)
{
string str = getsourcetextbyurl(url,chargest);
arraylist lib = new arraylist();
int i = 0;
//根据url取得网站域名
uri uri = new uri(url);
//scheme或者协议,一般为http,host为取得域名
string baseurl = uri.scheme + "://" + uri.host + "/";
//提取出url,包括src等信息
//\s匹配任何非空白字符
regex g = new regex(@"(src=(""|\')\s+\.(gif|jpg|png|bmp)(""|\'))", regexoptions.multiline | regexoptions.ignorecase);
matchcollection m = g.matches(str);
foreach (match math in m)
{
//已经提取到图片的路径了,但还需要分绝对路径,相对路径,以及后缀名是否为图片,因为可能为.asp,.aspx这些,比如验证码图片
string imgurl = math.groups[0].value.tolower();//转成小写,=号之间可能有不定的空格
//去除src与单引号,双引号
imgurl = imgurl.replace("src","");
imgurl = imgurl.replace("\"","");
imgurl = imgurl.replace("'","");
imgurl = imgurl.replace("=","");
imgurl = imgurl.trim();
//路径处理
if (imgurl.substring(0, 4) != "http")
{
//需要判断是否是绝对路径还是相对路径
if (imgurl.substring(0, 1) == "/")
{
imgurl = baseurl + imgurl;
}
else
{
imgurl = url.substring(0,url.lastindexof("/") + 1) + imgurl;
}
}
//判断元素是否已经存在,-1为不存在
if (lib.indexof(imgurl) == -1)
{
lib.add(imgurl);
}
}
string str_ = string.empty;
webclient client = new webclient();
for (int j = 0; j < lib.count; j++)
{
string savepath = path + datetime.now.month + datetime.now.day + datetime.now.minute + datetime.now.second + j + lib[j].tostring().substring((lib[j].tostring().length) -4,4);
try
{
client.downloadfile(new uri(lib[j].tostring()), savepath);
str_ += lib[j].tostring() + "<br /> 保存路径为:" + savepath + "<br /><br />";
}
catch (exception e)
{
str_ += e.message;
}
}
return str_;
}
public static string getsourcetextbyurl(string url,string chargest)
{
webrequest request = webrequest.create(url);
request.timeout = 20000;//20秒超时
webresponse response = request.getresponse();
stream resstream = response.getresponsestream();
streamreader sr = new streamreader(resstream,encoding.getencoding(chargest));
return sr.readtoend();
}
}
使用:比如我是保存到upload文件夹中的:
复制代码 代码如下:
string path = server.mappath("~/upload/");
response.write(caiji.caijibyurl(//www.jb51.net, "utf-8", path));