城市小区信息
要统计城市小区淘宝用户的覆盖率。数据部门貌似没统计这个数据,也许我没找到吧。jsoup抓下。
感谢http://hangzhou.haozu.com/无偿提供数据,一次性的廉价代码。自己去云梯搞数据真是烦,剩下的就交给数据分析的同事去帮忙吧。
1 import org.jsoup.Jsoup; 2 import org.jsoup.nodes.Document; 3 import org.jsoup.nodes.Element; 4 import org.jsoup.select.Elements; 5 6 import java.io.FileOutputStream; 7 import java.io.IOException; 8 import java.io.PrintWriter; 9 import java.util.HashMap; 10 import java.util.HashSet; 11 import java.util.Map; 12 import java.util.Set; 13 14 /** 15 * Created with IntelliJ IDEA. 16 * User: zhangbin 17 * Date: 13-6-20 18 * Time: 下午10:11 19 * To change this template use File | Settings | File Templates. 20 */ 21 public class Main { 22 public static void main(String[] args){ 23 String[] cities = new String[]{"beijing","tianjin","dalian","sjz","heb","sy","ty","cc","shanghai","hangzhou","nanjing","jinan","qd" 24 ,"xz","shenzhen","guangzhou","cs","haikou","xm","chengdu","chongqing","wuhan","zhengzhou","xa","lz","ly","gy" 25 }; 26 String[] citynames = new String[]{"北京","天津","大连","石家庄","哈尔滨","沈阳","太原","长春","上海","杭州","南京","济南","青岛","徐州","深圳","广州","长沙","海口","厦门" 27 ,"成都","重庆","武汉","郑州","西安","兰州","洛阳","贵阳" 28 }; 29 String middle = "haozu.com/community/"; 30 String head = "http://"; 31 String url = ""; 32 Map<String,Set<String>> blocksMap = new HashMap<String,Set<String>>(); 33 for(int i =0 ;i<cities.length;i++){ 34 String city = cities[i]; 35 String cityName = citynames[i]; 36 url = head+city+"."+middle; 37 Set<String> blocks = new HashSet<String>(); 38 for(int j=1;j<=10;j++){ 39 try { 40 Document doc = Jsoup.connect(url+"p"+j).get(); 41 Elements eles = doc.getElementsByClass("clist_name"); 42 for(Element ele : eles){ 43 Element tmp = ele.getElementsByTag("a").get(0); 44 String block = tmp.text(); 45 int index = block.indexOf('('); 46 if(index != -1){ 47 block = block.substring(0,index); 48 } 49 index = block.indexOf('('); 50 if(index != -1){ 51 block = block.substring(0,index); 52 } 53 blocks.add(block); 54 } 55 } catch (IOException e) { 56 System.out.println("error"); 57 } 58 } 59 blocksMap.put(cityName,blocks); 60 } 61 String lineSep = System.getProperty("line.separator"); 62 try { 63 FileOutputStream fos = new FileOutputStream("/home/zhangbin/CityBlocks.data"); 64 PrintWriter pw = new PrintWriter(fos); 65 for(Map.Entry<String,Set<String>> entry : blocksMap.entrySet()){ 66 Set<String> set = entry.getValue(); 67 pw.write(lineSep+lineSep+entry.getKey()+lineSep+lineSep); 68 for(String tmp : set){ 69 pw.write(tmp+lineSep); 70 } 71 pw.flush(); 72 73 } 74 pw.close(); 75 fos.close(); 76 } catch (Exception e) { 77 //ignore 78 } 79 80 } 81 }