c#实现网页图片提取工具代码分享
public array matchhtml(string html,string com)
{
list<string> urls = new list<string>();
html = html.tolower();
//获取src标签中的url
regex regexsrc = new regex("src=\"[^\"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]\"");
foreach(match m in regexsrc.matches(html))
{
string src = m.value;
src = src.replace("src=","").replace("\"","");
if (!src.contains("http"))
src = com + src;
if(!urls.contains(src))
urls.add(src);
}
//获取href标签中url
regex regexhref = new regex("href=\"[^\"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]\"");
foreach (match m in regexhref.matches(html))
{
string href = m.value;
href = href.replace("href=", "").replace("\"", "");
if (!href.contains("http"))
href = com + href;
if(!urls.contains(href))
urls.add(href);
}
return urls.toarray();
}
[dllimport("kernel32.dll")]
static extern bool setconsolemode(intptr hconsolehandle, int mode);
[dllimport("kernel32.dll")]
static extern bool getconsolemode(intptr hconsolehandle, out int mode);
[dllimport("kernel32.dll")]
static extern intptr getstdhandle(int handle);
const int std_input_handle = -10;
const int enable_quick_edit_mode = 0x40 | 0x80;
public static void enablequickeditmode()
{
int mode; intptr handle = getstdhandle(std_input_handle);
getconsolemode(handle, out mode);
mode |= enable_quick_edit_mode;
setconsolemode(handle, mode);
}
static void main(string[] args)
{
enablequickeditmode();
int oldcount = 0;
console.title = "takeimagefrominternet";
string path = "e:\\download\\loading\\";
while (true)
{
console.clear();
string countfile = "e:\\countfile.txt";//用来计数的文本,以至于文件名不重复
int cursor = 0;
if (file.exists(countfile))
{
string text = file.readalltext(countfile);
try
{
cursor =oldcount = convert.toint32(text);//次数多了建议使用long
}
catch { }
}
console.write("please input a url:");
string url = "http://www.baidu.com/";
string temp = console.readline();
if (!string.isnullorempty(temp))
url = temp;
match mcom = new regex(@"^(?i)http://(\w+\.){2,3}(com(\.cn)?|cn|net)\b").match(url);//获取域名
string com = mcom.value;
//console.writeline(mcom.value);
console.write("please input a save path:");
temp = console.readline();
if (directory.exists(temp))
path = temp;
console.writeline();
webclient client = new webclient();
byte[] htmldata = null;
htmldata = client.downloaddata(url);
memorystream mstream = new memorystream(htmldata);
string html = "";
using (streamreader sr = new streamreader(mstream))
{
html = sr.readtoend();
}
array urls = new matchhtmlimageurl().matchhtml(html,com);
foreach (string imageurl in urls)
{
console.writeline(imageurl);
byte[] imagedata = null;
try
{
imagedata = client.downloaddata(imageurl);
}
catch { }
if (imagedata != null && imagedata.length>0)
using (memorystream ms = new memorystream(imagedata))
{
try
{
string ext = aping.utility.file.fileopration.extendname(imageurl);
imageformat format = imageformat.jpeg;
switch (ext)
{
case ".jpg":
format = imageformat.jpeg;
break;
case ".bmp":
format = imageformat.bmp;
break;
case ".png":
format = imageformat.png;
break;
case ".gif":
format = imageformat.gif;
break;
case ".ico":
format = imageformat.icon;
break;
default:
continue;
}
image image = new bitmap(ms);
if (directory.exists(path))
image.save(path + "\\" + cursor + ext, format);
}
catch(exception ex) { console.writeline(ex.message); }
}
cursor++;
}
mstream.close();
file.writealltext(countfile, cursor.tostring(), encoding.utf8);
console.writeline("take done...image count:"+(cursor-oldcount).tostring());
}
}