公司搜索--关于搜索引擎结果的析出(通过工商码实现)
昨天写了一版,通过搜索引擎来找公司的名称,好多前辈提出来这个的方式有些偏,于是就使用工商码来实现了一个,两个东西放在一起。先上界面
右边的输入框是填从哪个工商码开始找的,因为好多年前的公司我们肯定是不需要的了,再加上,编码的前八位是地区码,所以,我写了两个,一个是在市区的公司,一个是江宁的,找出一公司分别是两家公司之后在同一地区注册的。结束条件是,一连两条数据没有找到结果,当然,这其中还有校验码的问题,就是大家看见的代码中要除以10再从1加到9的效果。
效果很好,打算一直让它跑着,等数据跑出来了,文件另存为.xls文件 ,可以直接 用excel打开,这两个地区找完了,再去找其它地区的。
好了,上代码 ,其实代码很简单:
1 using System; 2 using System.Collections.Generic; 3 using System.ComponentModel; 4 using System.Data; 5 using System.Drawing; 6 using System.IO; 7 using System.Linq; 8 using System.Net; 9 using System.Text; 10 using System.Text.RegularExpressions; 11 using System.Threading; 12 using System.Windows.Forms; 13 14 namespace SearchCompany 15 { 16 public partial class Form2 : Form 17 { 18 string startCode; 19 public delegate void MyInvoke1(string str, int type); 20 public delegate void MyInvoke2(ListViewItem lvi,int type); 21 public volatile bool WaitingStop = false; 22 Thread td; 23 DateTime startDateTime; 24 public Form2() 25 { 26 InitializeComponent(); 27 timer1.Interval = 500; 28 } 29 30 private void button1_Click(object sender, EventArgs e) 31 { 32 if (button1.Text == "暂停") 33 { 34 WaitingStop = true; 35 button1.Text = "开始"; 36 label2.Text = "已停止"; 37 textBox1.Enabled = true; 38 textBox1.Text = label2.Text.Trim(); 39 timer1.Stop(); 40 } 41 else 42 { 43 if (textBox1.Text.Trim() != "") 44 { 45 WaitingStop = false; 46 startCode = textBox1.Text.Trim(); 47 td = new Thread(new ThreadStart(GetCompany)); 48 td.IsBackground = true; 49 td.Start(); 50 button1.Text = "暂停"; 51 textBox1.Enabled = false; 52 startDateTime = DateTime.Now; 53 timer1.Start(); 54 } 55 } 56 } 57 public void AddListView(ListViewItem lvi,int type) 58 { 59 if(type==0) 60 { 61 62 if (listView1.InvokeRequired) 63 { 64 MyInvoke2 _myInvoke2 = new MyInvoke2(AddListView); 65 this.Invoke(_myInvoke2, new object[] { lvi, type }); 66 } 67 else 68 { 69 listView1.Clear(); 70 } 71 } 72 else if (type == 1) 73 { 74 75 if (listView1.InvokeRequired) 76 { 77 MyInvoke2 _myInvoke2 = new MyInvoke2(AddListView); 78 this.Invoke(_myInvoke2, new object[] { lvi, type }); 79 } 80 else 81 { 82 listView1.Items.Add(lvi); 83 } 84 } 85 } 86 public void setFromThread(string str,int type) 87 { 88 if (type == 0) 89 { 90 if (label2.InvokeRequired) 91 { 92 MyInvoke1 _myInvoke = new MyInvoke1(setFromThread); 93 this.Invoke(_myInvoke, new object[] { str, type }); 94 } 95 else 96 { 97 label2.Text = str; 98 } 99 } 100 else if (type == 1) 101 { 102 if (textBox2.InvokeRequired) 103 { 104 MyInvoke1 _myInvoke = new MyInvoke1(setFromThread); 105 this.Invoke(_myInvoke, new object[] { str, type }); 106 } 107 else 108 { 109 textBox2.Text = str; 110 } 111 } 112 } 113 public void GetCompany() 114 { 115 for (int line=0; line < textBox1.Lines.Count(); line++) 116 { 117 try 118 { 119 long startCode = long.Parse(textBox1.Lines[line].Trim()); 120 long Code = startCode / 10; 121 bool breakwhile = false; 122 int NullCounut = 0; 123 while (!WaitingStop) 124 { 125 if (NullCounut > 2) break; 126 for (int i = 0; i < 10; i++) 127 { 128 long realCode = Code * 10 + i; 129 setFromThread("当前探测的编码为:" + realCode.ToString().Trim(), 0); 130 HttpWebRequest request; 131 StringBuilder sbPageString = new StringBuilder(); 132 request = (HttpWebRequest)WebRequest.Create(@"http://www.njgs.gov.cn/CorpQuery"); 133 request.MaximumAutomaticRedirections = 500; 134 request.Method = "Post"; 135 request.ContentType = "application/x-www-form-urlencoded"; 136 string postString = "txtRegNoOrCorpName=" + realCode.ToString() + "&Submit2="; 137 byte[] postData = Encoding.UTF8.GetBytes(postString); 138 request.CookieContainer = new CookieContainer(); 139 request.Timeout = 3000; 140 request.Headers.Set("Pragma", "no-cache"); 141 request.ContentLength = postData.Length; 142 Stream requestStream = request.GetRequestStream(); 143 requestStream.Write(postData, 0, postData.Length); 144 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 145 Stream sm = response.GetResponseStream(); 146 Encoding ecode = Encoding.GetEncoding("utf-8"); 147 StreamReader sr = new StreamReader(sm, ecode); 148 string pages = sr.ReadToEnd(); 149 //从pages里面取下一页的地址/s?.*(?="\sclass="n">下一页></a><span class="nums" style="margin-left:120px">) 150 setFromThread(pages, 1); 151 //int i; 152 Regex NameRgx = new Regex(@"(?<=企业名称</th>\r\n\s*?<td>\s*).+?(?=</td>)"); 153 Match MC = NameRgx.Match(pages, 0, pages.Length); 154 if (MC.Success) 155 { 156 ListViewItem lvi = new ListViewItem(); 157 string[] item = { MC.Value.ToString(), getData(pages, "注册时间"), getData(pages, "企业注册号"), getData(pages, "登记机关"), getData(pages, "企业状态") }; 158 lvi.SubItems.AddRange(item); 159 AddListView(lvi, 1); 160 Code++; 161 break; 162 } 163 if (i == 9) NullCounut++; 164 } 165 Code++; 166 } 167 setFromThread(Code.ToString() + "0", 0); 168 } 169 catch (Exception ex) 170 { 171 setFromThread("出现异常:" + ex.Message.ToString(), 0); 172 } 173 } 174 } 175 private string getData(string pages,string str) 176 { 177 Regex NameRgx = new Regex(@"(?<="+str+@"</th>\r\n\s*?<td>\s*).+?(?=</td>)"); 178 Match MC = NameRgx.Match(pages, 0, pages.Length); 179 if (MC.Success) 180 { 181 182 return MC.Value.ToString(); 183 }else{ 184 return "无"; 185 } 186 } 187 private void button2_Click(object sender, EventArgs e) 188 { 189 try 190 { 191 Regex rgx = new Regex(textBox4.Text.Trim()); 192 Match mc = rgx.Match(textBox2.Text.Trim()); 193 if (mc.Success) 194 { 195 textBox3.Text = mc.Value; 196 } 197 else 198 { 199 textBox3.Text = "未匹配到"; 200 } 201 } 202 catch (Exception ex) 203 { 204 textBox3.Text = ex.Message.ToString(); 205 } 206 } 207 208 private void textBox4_TextChanged(object sender, EventArgs e) 209 { 210 try 211 { 212 Regex rgx = new Regex(textBox4.Text.Trim()); 213 Match mc = rgx.Match(textBox2.Text.Trim()); 214 if (mc.Success) 215 { 216 textBox3.Text = mc.Value; 217 } 218 else 219 { 220 textBox3.Text = "未匹配到"; 221 } 222 } 223 catch (Exception ex) 224 { 225 textBox3.Text = ex.Message.ToString(); 226 } 227 } 228 229 private void button3_Click(object sender, EventArgs e) 230 { 231 listView1.Clear(); 232 } 233 234 private void timer1_Tick(object sender, EventArgs e) 235 { 236 label3.Text = "已查询时间:" + (DateTime.Now - startDateTime).ToString(); 237 } 238 239 private void button2_Click_1(object sender, EventArgs e) 240 { 241 saveFileDialog1.DefaultExt = ".xls"; 242 if (saveFileDialog1.ShowDialog() == DialogResult.OK) 243 { 244 try 245 { 246 System.IO.FileStream fs = (System.IO.FileStream)saveFileDialog1.OpenFile(); 247 248 StreamWriter sw = new StreamWriter(fs); 249 sw.WriteLine("公司名称" + "\t" + "注册时间" + "\t" + "企业注册号" + "\t" + "登记机关" + "\t" + "企业状态"); 250 for (int i = 0; i <listView1.Items.Count; i++) 251 { 252 sw.WriteLine(listView1.Items[i].SubItems[1].Text.ToString() + "\t" + listView1.Items[i].SubItems[2].Text.ToString() + "\t" + listView1.Items[i].SubItems[3].Text.ToString() + "\t" + listView1.Items[i].SubItems[4].Text.ToString() + "\t" + listView1.Items[i].SubItems[5].Text.ToString()); 253 } 254 sw.Flush(); 255 sw.Close(); 256 fs.Close(); 257 MessageBox.Show("文件保存成功"); 258 } 259 catch (Exception ex) 260 { 261 MessageBox.Show("异常:\n{0}", ex.Message.ToString()); 262 } 263 } 264 } 265 } 266 267 }
小弟抛砖引玉了,希望大家多提意见,轻喷~~~