C#语言实现对网页图片的随机抓取。基础办法,但还有一点小问题,欢迎大家指正

  //当点击读取的时候,抓取网页源代码
        string wangzhi;
        string respHtml;
        private void 读取网页源代码ToolStripMenuItem_Click(object sender, EventArgs e)
        {
            textBox2.Clear();  //读 取之前清空
            wangzhi = comboBox1.Text;
            HttpWebRequest rep = (HttpWebRequest)WebRequest.Create(wangzhi);  //通过网址找到网页放在rep里。建立连接
            HttpWebResponse resp = (HttpWebResponse)rep.GetResponse();  //读取网页
            Encoding htmlEncoding = Encoding.Default;   //确定编码格式
            StreamReader sr = new StreamReader(resp.GetResponseStream(), htmlEncoding); //把网页源代码存入流中
            respHtml = sr.ReadToEnd();  //把流从头到尾读出,转换成字符串
            textBox2.Text = respHtml; //获取的网页源代码
            comboBox1.Items.Add(comboBox1.Text);

    }

 

  //从网页源码中获取图片,并且下载到E盘
        public int num = 0;
        private void 从网页源码中读取图片ToolStripMenuItem_Click(object sender, EventArgs e)
        {
             listView1.Columns.Clear();
             listView1.Items.Clear();
             listView1.Columns.Add("链接地址和图片地址",700);
             string result = respHtml;     //用result接受网页源代码
             MatchCollection mc;            //mc是个集合可以放任何东西           

              //正则表达式获取<img src=>图片url 
            mc = Regex.Matches(result, @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);  
            foreach (Match match in mc)      //遍历集合,并把所有的图片地址放在listViews里
            {  
            listView1.Items.Add(match.Value.ToString());  
            }
            num = listView1.Items.Count;  //记录有多少条img地址
            string imgsrc = string.Empty;  //定义
            //循环下载
            for (int i = 0; i < num; i++)
            {
                string imgurl = listView1.Items[i].ToString();  //获取图片url地址
                Regex reg = new Regex(@"<img.*?src=""(?<src>[^""]*)""[^>]*>",RegexOptions.IgnoreCase);//表示不可变的正则表达式
                MatchCollection mcl = reg.Matches(imgurl);  //设定要查找的字符串
                foreach (Match mm in mcl)
                {
                    try
                    {
                        WebRequest req = WebRequest.Create(mm.Groups["src"].Value);  //图片的src内容
                        WebResponse res = req.GetResponse();
                        //用文件流读取图片
                        Stream reader = res.GetResponseStream(); //从互联网返回数据流
                        string path = "E://" + i.ToString() + ".jpg";   //图片路径命名.注意格式,E后面的引号用中文!!!!!!!!!!
                        FileStream writer = new FileStream(path, FileMode.OpenOrCreate, FileAccess.Write);
                        byte[] buff = new byte[512];     //不是很明白,求大神解释
                        int c = 0;
                        while ((c = reader.Read(buff, 0, buff.Length)) > 0)
                        {
                            writer.Write(buff, 0, c);

                        }
                        writer.Close();
                        writer.Dispose();
                        reader.Close();
                        reader.Dispose();
                        res.Close();
                        listView1.Items.Add(path + "图片保存成功!");
                       
                    }
                    catch
                    {
                        MessageBox.Show("本网页图片读取完毕");
                    }
                   
                }
            }

           
        }
    }
  }

 

posted @ 2015-06-20 11:44  zhenximeiyitian  阅读(369)  评论(0编辑  收藏  举报