Linguistic corpora 种子语料库-待分析对象-分析与更新语料库
Computational Linguistics
http://matplotlib.org/
https://github.com/matplotlib/matplotlib/blob/master/INSTALL#L59
http://www.nltk.org/book/ch01.html#id9
1 C:\Users\w>python -m pip install --upgrade pip 2 Collecting pip 3 Retrying (Retry(total=4, connect=None, read=None, redirect=None)) after connection broken by 'ConnectTimeoutError(<pip._vendor.reque 4 Retrying (Retry(total=3, connect=None, read=None, redirect=None)) after connection broken by 'ReadTimeoutError("HTTPSConnectionPool( 5 Downloading pip-9.0.1-py2.py3-none-any.whl (1.3MB) 6 100% |████████████████████████████████| 1.3MB 14kB/s 7 Installing collected packages: pip 8 Found existing installation: pip 8.1.1 9 Uninstalling pip-8.1.1: 10 Successfully uninstalled pip-8.1.1 11 Successfully installed pip-9.0.1 12 13 C:\Users\w>python -m pip install matplotlib 14 Collecting matplotlib 15 Downloading matplotlib-1.5.3-cp35-cp35m-win_amd64.whl (6.5MB) 16 100% |████████████████████████████████| 6.5MB 30kB/s 17 Collecting pytz (from matplotlib) 18 Downloading pytz-2016.10-py2.py3-none-any.whl (483kB) 19 100% |████████████████████████████████| 491kB 35kB/s 20 Collecting pyparsing!=2.0.4,!=2.1.2,>=1.5.6 (from matplotlib) 21 Downloading pyparsing-2.1.10-py2.py3-none-any.whl (56kB) 22 100% |████████████████████████████████| 61kB 29kB/s 23 Collecting numpy>=1.6 (from matplotlib) 24 Downloading numpy-1.11.2-cp35-none-win_amd64.whl (7.6MB) 25 100% |████████████████████████████████| 7.6MB 32kB/s 26 Collecting cycler (from matplotlib) 27 Downloading cycler-0.10.0-py2.py3-none-any.whl 28 Collecting python-dateutil (from matplotlib) 29 Downloading python_dateutil-2.6.0-py2.py3-none-any.whl (194kB) 30 100% |████████████████████████████████| 194kB 46kB/s 31 Collecting six (from cycler->matplotlib) 32 Downloading six-1.10.0-py2.py3-none-any.whl 33 Installing collected packages: pytz, pyparsing, numpy, six, cycler, python-dateutil, matplotlib 34 Successfully installed cycler-0.10.0 matplotlib-1.5.3 numpy-1.11.2 pyparsing-2.1.10 python-dateutil-2.6.0 pytz-2016.10 six-1.10.0
text4.dispersion_plot(["kate","he","she","jack"])
1 <p id="w_last" style="color: red; font-size: 6em;">w-WAITING---</p><br> 2 <?php 3 include('conn.php'); 4 //http://www.baidu.com/s?wd=%E5%8F%96%E8%8B%B1%E6%96%87%E5%90%8D 5 $w_db_incr_girl = 0; 6 for ($w = 0; $w < 153; $w++) { 7 $wgirl = 'http://api.open.baidu.com/pae/channel/data/asyncqury?appid=4036&srcid=4036&from_mid=1&format=json&ie=utf-8&oe=utf-8&subtitle=%E8%8B%B1%E6%96%87%E5%90%8D&query=%E8%8B%B1%E6%96%87%E5%90%8D&rn=5&stat1=%E5%A5%B3%E7%94%9F&pn=' . (5 * $w) . '&srcid=4036&cb=jQuery110205654252001601794_1481004786057&_=' . (1481004786059 + $w); 8 $w_file = file_get_contents($wgirl); 9 $partten = '/\"englishname\"\:\"\w{0,}\"/'; 10 $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER); 11 $tmp = 0; 12 foreach ($matches AS $one) { 13 if ($tmp % 3 == 2) { 14 $given_name = substr($one[0], 15, strlen($one[0]) - 15 - 1); 15 $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wgirl . '",0)'; 16 if (mysqli_query($link, $sql)) { 17 $w_db_incr_girl++; 18 }; 19 } 20 $tmp++; 21 } 22 } 23 24 25 $w_db_incr_boy = 0; 26 for ($w = 0; $w < 153; $w++) { 27 28 // $wgirl = 'http://api.open.baidu.com/pae/channel/data/asyncqury?appid=4036&srcid=4036&from_mid=1&format=json&ie=utf-8&oe=utf-8&subtitle=%E8%8B%B1%E6%96%87%E5%90%8D&query=%E8%8B%B1%E6%96%87%E5%90%8D&rn=5&stat1=%E5%A5%B3%E7%94%9F&pn='.(5*$w).'&srcid=4036&cb=jQuery110205654252001601794_1481004786057&_='.(1481004786059+$w); 29 30 $wboy = 'http://api.open.baidu.com/pae/channel/data/asyncqury?appid=4036&srcid=4036&from_mid=1&format=json&ie=utf-8&oe=utf-8&subtitle=%E8%8B%B1%E6%96%87%E5%90%8D&query=%E8%8B%B1%E6%96%87%E5%90%8D&rn=5&pn=' . (5 * $w) . '&srcid=4036&stat1=%E7%94%B7%E7%94%9F&cb=jQuery1102017382318514491035_1481005337608&_=' . (1481004786059 + $w); 31 $w_file = file_get_contents($wboy); 32 $partten = '/\"englishname\"\:\"\w{0,}\"/'; 33 $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER); 34 35 $tmp = 0; 36 foreach ($matches AS $one) { 37 if ($tmp % 3 == 2) { 38 $given_name = substr($one[0], 15, strlen($one[0]) - 15 - 1); 39 $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wboy . '",1)'; 40 if (mysqli_query($link, $sql)) { 41 $w_db_incr_boy++; 42 }; 43 } 44 $tmp++; 45 } 46 } 47 48 49 $w_arr = array_merge(range('A', 'Z')); 50 //http://ename.dict.cn/list/female/R/2 51 foreach ($w_arr AS $w_range) { 52 for ($w = 1; $w < 8; $w++) { 53 $wgirl = 'http://ename.dict.cn/list/female/' . $w_range . '/' . $w; 54 $w_file = file_get_contents($wgirl); 55 $partten = '/' . 'href=\"\/\w{0,}\"\>' . '/'; 56 $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER); 57 foreach ($matches AS $one) { 58 $given_name = substr($one[0], 7, strlen($one[0]) - 7 - 2); 59 $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wgirl . '",0)'; 60 if (mysqli_query($link, $sql)) { 61 $w_db_incr_girl++; 62 }; 63 } 64 } 65 66 67 for ($w = 1; $w < 8; $w++) { 68 $wboy = 'http://ename.dict.cn/list/male/' . $w_range . '/' . $w; 69 $w_file = file_get_contents($wboy); 70 $partten = '/' . 'href=\"\/\w{0,}\"\>' . '/'; 71 $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER); 72 foreach ($matches AS $one) { 73 $given_name = substr($one[0], 7, strlen($one[0]) - 7 - 2); 74 $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wboy . '",1)'; 75 if (mysqli_query($link, $sql)) { 76 $w_db_incr_boy++; 77 }; 78 } 79 } 80 } 81 82 ?> 83 <script> 84 document.getElementById('w_last').innerHTML = 'w_db_incr_girl\'s=<?= $w_db_incr_girl?>,w_db_incr_boy\'s=' +<?= $w_db_incr_boy?>; 85 </script>
1 $sql_db_check = 'SEELCT isboy FROM namelist WHERE given_name="'.$given_name.'"'; 2 $check = db_multiple_rows_link($link, $sql_db_check); 3 if(count($check)==2){ 4 $isboy = 2; 5 }elseif(count($check)==1){ 6 $isboy = $check['isboy']; 7 }elseif(count($check)==0){ 8 $w_arr = w_cross_domian_name_isboy($given_name); 9 //var_dump($w_arr); 10 $isboy = $w_arr['w_code']; 11 $grab_url = $w_arr['w_url']; 12 if($isboy!=4){ 13 if($isboy==1){ 14 $sql_w ='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",1)'; 15 }elseif($isboy==0){ 16 $sql_w ='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",0)'; 17 }elseif($isboy==2){ 18 $sql_w ='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",1)'.';'; 19 $sql_w .='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",0)'; 20 } 21 // var_dump($sql_w); 22 mysqli_multi_query($link,$sql_w);
1 function w_given_name($wstr) 2 { 3 $given_name = strstr($wstr, ' ', TRUE); 4 if (empty($given_name)) $given_name = ltrim($wstr); 5 $given_name = strtoupper(substr($given_name, 0, 1)) . strtolower(substr($given_name, 1)); 6 RETURN $given_name; 7 } 8 9 //http://dict.youdao.com/w/eng/Tommy/#keyfrom=dict2.index 10 //http://dict.youdao.com/w/eng/Chris/#keyfrom=dict2.index 11 //http://dict.youdao.com/w/eng/Billie/#keyfrom=dict2.index 12 //http://dict.youdao.com/w/eng/Mikhael/#keyfrom=dict2.index 13 function w_cross_domian_name_isboy($name) 14 { 15 $url = 'http://dict.youdao.com/w/eng/' . $name . '/#keyfrom=dict2.index'; 16 $w_file = file_get_contents($url); 17 // $wfile = fopen('w.w', 'w'); 18 //fwrite($wfile, $w_file); 19 20 $partten = '/' . '您要找的是不是' . '/'; 21 preg_match_all($partten, $w_file, $matches_spell, PREG_SET_ORDER); 22 if (!empty($matches_spell)) { 23 } else { 24 $partten = '/' . '男子名' . '/'; 25 preg_match_all($partten, $w_file, $matches_boy, PREG_SET_ORDER); 26 $partten = '/' . '女子名' . '/'; 27 preg_match_all($partten, $w_file, $matches_girl, PREG_SET_ORDER); 28 } 29 30 $w = array(); 31 $w['w_url'] = $url; 32 $w['w_code'] = 4; 33 if (!empty($matches_spell) || (empty($matches_boy) && empty($matches_girl))) { 34 } elseif (!empty($matches_boy) && !empty($matches_girl)) { 35 $w['w_code'] = 2; 36 } elseif (!empty($matches_boy)) { 37 $w['w_code'] = 1; 38 } elseif (!empty($matches_girl)) { 39 $w['w_code'] = 0; 40 } 41 RETURN $w; 42 }