抓取cntv电视节目表
from time import strftime,localtime import httplib2 import re, string, sys, unicodedata date = strftime('%Y-%m-%d', localtime()) print date url = "http://tv.cntv.cn/index.php?action=epg-list&date="+date+"&channel=cctvgaoqing" #url = "http://tv.cntv.cn/index.php?action=epg-list&date=2013-04-01&channel=russian" headers = {"Host":"tv.cntv.cn", "Referer":"http://tv.cntv.cn/epg", "X-Requested-With":"XMLHttpRequest"} h = httplib2.Http() resp, content = h.request(url, 'GET', headers=headers) #print resp #print content listP=re.findall(r"<dl>(.*?)</dl>", content, re.S) for dItem in listP : listD = re.findall(r"<dd>(.*?)</dd>", dItem, re.S) for aItem in listD: listA = re.findall(r"<a.*?>(.*?)</a>", aItem, re.S) if len(listA) == 2: print listA[1].decode('utf-8') else : print string.strip(aItem).decode('utf-8')