技术宅,fat-man

增加语言的了解程度可以避免写出愚蠢的代码

导航

shell抓取

#!/bin/sh

dir=`dirname $0`
configDir="$dir/config"

ipport="$configDir/ip_port"

url="http://www.youdaili.cn/Daili/http/"
indexs=$(curl -s --max-time 200 "$url" |piconv -f utf8 -t gbk|awk '$0~/http:\/\/www.youdaili.cn\/static\/images\/hot.gif/{print substr($2,41,length($2)-46)}')

pages="$(curl -s --max-time 200  "${url}${indexs}.html"|piconv -f utf8 -t gbk|awk '$0~/共.*页/{page=gensub(/.*共([^页]+).*/,"\\1","1",$0);print page}')"

for((page=1;page<=$pages;page++))
do
        if [[ $page -eq 1  ]]
        then
                curl -s --max-time 200  "${url}${indexs}.html"|piconv -f utf8 -t gbk|awk '$0~/.*@HTTP#.*<br \/>/{gsub(".*<p>","",$0);gsub(".*<span>","",$0);gsub("@HTTP#.*","",$0);print}'
        else
                link="${url}${indexs}_$page.html"
                curl -s --max-time 200  "$link"|piconv -f utf8 -t gbk|awk '$0~/.*@HTTP#.*<br \/>/{gsub(".*<p>","",$0);gsub(".*<span>","",$0);gsub("@HTTP#.*","",$0);print}'
        fi
done | sort -u >$ipport

 

posted on 2014-04-14 21:39  codestyle  阅读(479)  评论(0编辑  收藏  举报