抓取cntv电视节目表

from time import strftime,localtime
import httplib2
import re, string, sys, unicodedata

date = strftime('%Y-%m-%d', localtime())
print date

url = "http://tv.cntv.cn/index.php?action=epg-list&date="+date+"&channel=cctvgaoqing"
#url = "http://tv.cntv.cn/index.php?action=epg-list&date=2013-04-01&channel=russian"
headers = {"Host":"tv.cntv.cn", "Referer":"http://tv.cntv.cn/epg", "X-Requested-With":"XMLHttpRequest"}

h = httplib2.Http()
resp, content = h.request(url, 'GET', headers=headers)

#print resp
#print content

listP=re.findall(r"<dl>(.*?)</dl>", content, re.S)

for dItem in listP :
    listD = re.findall(r"<dd>(.*?)</dd>", dItem, re.S)
    for aItem in listD:
        listA = re.findall(r"<a.*?>(.*?)</a>", aItem, re.S)
        if len(listA) == 2:
            print listA[1].decode('utf-8')
        else :
            print string.strip(aItem).decode('utf-8')

 

posted @ 2013-04-01 18:39  rikioy  阅读(787)  评论(0编辑  收藏  举报