欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

c#实现网页图片提取工具代码分享

程序员文章站 2024-02-18 23:19:28
复制代码 代码如下:public array matchhtml(string html,string com)     ...

复制代码 代码如下:

public array matchhtml(string html,string com)
       {
           list<string> urls = new list<string>();
           html = html.tolower();
           //获取src标签中的url
           regex regexsrc = new regex("src=\"[^\"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]\"");
           foreach(match m in regexsrc.matches(html))
           {
               string src = m.value;
               src = src.replace("src=","").replace("\"","");
               if (!src.contains("http"))
                   src = com + src;
               if(!urls.contains(src))
               urls.add(src);
           }
           //获取href标签中url
           regex regexhref = new regex("href=\"[^\"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]\"");
           foreach (match m in regexhref.matches(html))
           {
               string href = m.value;
               href = href.replace("href=", "").replace("\"", "");
               if (!href.contains("http"))
                   href = com + href;
               if(!urls.contains(href))
               urls.add(href);
           }
           return urls.toarray();
       }

复制代码 代码如下:

[dllimport("kernel32.dll")]
       static extern bool setconsolemode(intptr hconsolehandle, int mode);
       [dllimport("kernel32.dll")]
       static extern bool getconsolemode(intptr hconsolehandle, out int mode);
       [dllimport("kernel32.dll")]
       static extern intptr getstdhandle(int handle);
       const int std_input_handle = -10;
       const int enable_quick_edit_mode = 0x40 | 0x80;
       public static void enablequickeditmode()
       {
           int mode; intptr handle = getstdhandle(std_input_handle);
           getconsolemode(handle, out mode);
           mode |= enable_quick_edit_mode;
           setconsolemode(handle, mode);
       }
       static void main(string[] args)
       {
           enablequickeditmode();
           int oldcount = 0;
           console.title = "takeimagefrominternet";
           string path = "e:\\download\\loading\\";
           while (true)
           {
               console.clear();
               string countfile = "e:\\countfile.txt";//用来计数的文本,以至于文件名不重复
               int cursor = 0;
               if (file.exists(countfile))
               {
                   string text = file.readalltext(countfile);
                   try
                   {
                       cursor =oldcount = convert.toint32(text);//次数多了建议使用long
                   }
                   catch { }
               }
               console.write("please input a url:");
               string url = "http://www.baidu.com/";
               string temp = console.readline();
               if (!string.isnullorempty(temp))
                   url = temp;
               match mcom = new regex(@"^(?i)http://(\w+\.){2,3}(com(\.cn)?|cn|net)\b").match(url);//获取域名
               string com = mcom.value;
               //console.writeline(mcom.value);
               console.write("please input a save path:");
               temp = console.readline();
               if (directory.exists(temp))
                   path = temp;
               console.writeline();
               webclient client = new webclient();
               byte[] htmldata = null;
               htmldata = client.downloaddata(url);
               memorystream mstream = new memorystream(htmldata);
               string html = "";
               using (streamreader sr = new streamreader(mstream))
               {
                   html = sr.readtoend();
               }
               array urls = new matchhtmlimageurl().matchhtml(html,com);

               foreach (string imageurl in urls)
               {
                  console.writeline(imageurl);
                   byte[] imagedata = null;
                   try
                   {
                       imagedata = client.downloaddata(imageurl);
                   }
                   catch { }
                   if (imagedata != null && imagedata.length>0)
                       using (memorystream ms = new memorystream(imagedata))
                       {
                           try
                           {

                               string ext = aping.utility.file.fileopration.extendname(imageurl);
                               imageformat format = imageformat.jpeg;
                               switch (ext)
                               {
                                   case ".jpg":
                                       format = imageformat.jpeg;
                                       break;
                                   case ".bmp":
                                       format = imageformat.bmp;
                                       break;
                                   case ".png":
                                       format = imageformat.png;
                                       break;
                                   case ".gif":
                                       format = imageformat.gif;
                                       break;
                                   case ".ico":
                                       format = imageformat.icon;
                                       break;
                                   default:
                                       continue;
                               }
                               image image = new bitmap(ms);
                               if (directory.exists(path))
                                   image.save(path + "\\" + cursor + ext, format);
                           }
                           catch(exception ex) { console.writeline(ex.message); }
                       }
                   cursor++;
               }
               mstream.close();
               file.writealltext(countfile, cursor.tostring(), encoding.utf8);
               console.writeline("take done...image count:"+(cursor-oldcount).tostring());
           }           
       }