a python script to download video from youku
A useful script to download video from youku.
I find a related one from a google project http://code.google.com/u/ldmiao,which only can get one video address. however, the youku split a video into about 10 subvideos, so the script can not get the whole video .After a study on the youku json, the script is improved as followed:
I find a related one from a google project http://code.google.com/u/ldmiao,which only can get one video address. however, the youku split a video into about 10 subvideos, so the script can not get the whole video .After a study on the youku json, the script is improved as followed:
#youku.py
#download video from youku
import feedparser,urllib,urllib2
import re
class Youku():
def __init__(self,uri):
self.regex={
'url':re.compile('youku\.com/(?:v_show/id_)?([-+_\w]+)=*\.html',re.IGNORECASE),
'ticket':(re.compile('addVariable\s*\(\s*[\'"]VideoIDS[\'"]\s*,\s*(\d+)\s*\)',re.IGNORECASE),re.compile('sendVideoLink\s*\(\s*[\'"][^\'"]+[\'"]\s*,\s*[\'"]([^\'"]+)[\'"]',re.IGNORECASE),re.compile('[\'"\s]+?key1[\'"\s]+?:\s*[\'"]([^\'"]+)[\'"]\s*,\s*[\'"\s]+?key2[\'"\s]+?:\s*[\'"]([^\'"]+)[\'"]',re.IGNORECASE))
}
self.valid=self.regex['url'].search(uri)
if self.valid:
self.id='yk:'+self.valid.group(1)
self.link='http://v.youku.com/v_show/id_'+self.valid.group(1)
def download(self):
URL= self.getRealURL()
for u in URL:
cmd ='wget %s' % u
os.system(cmd)
#print cmd
def getRealURL(self):
realURL=[]
shortid=''
longid=''
key1=''
key2=''
html=urllib2.urlopen(self.link)
num =0
for i in html:
res1,res2=self.regex['ticket'][0].search(i),self.regex['ticket'][1].search(i)
if res1:
shortid=res1.group(1)
if res2:
longid=res2.group(1)
if shortid and longid:
html=urllib2.urlopen('http://v.youku.com/player/getPlayList/VideoIDS/'+shortid)
for i in html:
i = i.strip()
res=self.regex['ticket'][2].search(i)
if res:
key1=res.group(1)
key2=res.group(2)
res3=re.compile('\"no\"').findall(i)
if res3:
num += len(res3)
if key1 and key2:
print "num %d" % num
mobj = re.search(r'''(\d{8})(\d{2})(.*)''', longid)
url_prefix = mobj.group(1)
file_order = mobj.group(2)
url_suffix = mobj.group(3)
for i in range(num):
urlid = url_prefix+'%02d'+url_suffix
urlid = urlid % i
newurl = ('http://f.youku.com/player/getFlvPath/'+urlid+'?k='+key2+hex(int(key1,16)^0xA55AA5A5)[2:])
realURL.append(newurl)
return realURL
if __name__ == '__main__':
url='http://v.youku.com/v_show/id_XMTA0Nzk2OTY4.html'
yk = Youku(url)
print yk.getRealURL()
yk.download()
posted on 2009-07-22 23:13 xueliangliu 阅读(439) 评论(0) 编辑 收藏 举报