医药CRM系统开发

自已做医药CRM系统有四年了,终于可以算个产品了,努力市场化,今年重种将医药营销的理念加入CRM

导航

网站数据采集

Posted on 2007-11-26 08:48  hhq80  阅读(321)  评论(0编辑  收藏  举报

//根据Url地址得到网页的html源码
private string GetWebContent(string sUrl)
{
           // string sURL="";
   string sLine = "";
   string sLinepage = "";
   int i = 0;
   WebRequest wrGETURL;
            //for (int j=1;j<=1;j++)
            //{
           // sURL = " http://app1.sfda.gov.cn/datasearch/face3/content.jsp?tableId=25&tableName=TABLE25&tableView=%E5%9B%BD%E4%BA%A7%E8%8D%AF%E5%93%81&Id=1";//+j.ToString();
    wrGETURL = WebRequest.Create(sUrl);
    Stream objStream = wrGETURL.GetResponse().GetResponseStream();
    StreamReader objReader = new StreamReader(objStream);
    sLinepage = "";
    sLine = objReader.ReadLine();
    while (sLine!=null)
    {
     i++;
     sLine = objReader.ReadLine();
     if (sLine!=null)
     {
      sLinepage=sLinepage+sLine;
      
     }
    }
                return sLinepage;
        }
        //得到指定字串之间的数据
        private string SplitStr(string src, string startstr, string stopstr)
        {
            //找到开始字符的位置
            string resultstr;
            int startpos=0;
            int stoppos=0;
            MatchCollection Matches = Regex.Matches(src, startstr, RegexOptions.None);
            foreach (Match NextMatch in Matches)
            {
                startpos=NextMatch.Index+startstr.Length;
            }
          

            MatchCollection Matches2 = Regex.Matches(src, stopstr, RegexOptions.None);
            foreach (Match NextMatch2 in Matches2)
            {
                stoppos = NextMatch2.Index;
            }
            if (stoppos < startpos)
                stoppos = startpos;
            if (stopstr == "结束符")
                stoppos = src.Length;
            resultstr = src.Substring(startpos, stoppos - startpos);
           // MessageBox.Show(resultstr);
            return resultstr;
        }
        private void button1_Click(object sender, EventArgs e)
        {
            //要抓取的URL地址
            string date1 = DateTime.Now.ToString("yymmddhhmmss");
            MessageBox.Show(date1);
            int j;
            string src;
            string strWebContent;
            string desc, sr, sp;
            desc = "";
            WebBrowser webfda = new WebBrowser();
            for (j = 1; j <= 10; j++)
            {
                string Url = "http://app1.sfda.gov.cn/datasearch/face3/content.jsp?tableId=26&tableName=TABLE26&tableView=%E5%9B%BD%E4%BA%A7%E5%99%A8%E6%A2%B0&Id=" + j.ToString();

                //得到指定Url的源码
                strWebContent = GetWebContent(Url);

                //生成HtmlDocument
                label1.Text = j.ToString();

                webfda.Navigate("about:blank");
                HtmlDocument htmldoc = webfda.Document.OpenNew(true);
                htmldoc.Write(strWebContent);
                //textBox1 .Text= htmldoc.Body.InnerHtml;
                // textBox2.Text = htmldoc.Body.InnerText;
                //生产场所
                src = htmldoc.Body.InnerText;
                sr = "生产场所";
                sp = "变更日期";//
                desc = SplitStr(src, sr, sp);

                sr = "变更日期";
                sp = "备注";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "备注";
                sp = "注册号";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "注册号";
                sp = "生产单位";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "生产单位";
                sp = "地址";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "地址";
                sp = "邮编";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "邮编";
                sp = "产品名称";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "产品名称";
                sp = "产品标准";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "产品标准";
                sp = "产品性能结构及组成";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "产品性能结构及组成";
                sp = "有效期";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "有效期";
                sp = "批准日期";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "批准日期";
                sp = "产品适用范围";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "产品适用范围";
                sp = "规格型号";//
                desc = desc + "," + SplitStr(src, sr, sp);

                sr = "规格型号";
                sp = "结束符";//
                desc = desc + "," + SplitStr(src, sr, sp) + "\n";
                label2.Text = j.ToString();
                textBox2.Text = textBox2.Text + desc;
                desc = "";
            }
           
        }