1.download title and url
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re, urllib2,threading
def geturltitle(match, file):
s = match.group();
p = re.compile(r'^\[mukio=file\]');
downurl = re.sub(p, '', s);
print downurl;
# 过滤url
if downurl:
file.writelines(downurl);
file.write('\n');
# for line in downurl:
# file.write(line);
# 过滤title
pattern1 = re.compile(r'<meta name="keywords" content="\S.*"');
match1 = pattern1.search(respread);
if match1:
s1 = match1.group();
p1 = re.compile(r'^<meta name="keywords" content="');
title = re.sub(p1, '', s1);
print title;
if title:
file.writelines(title);
file.write('\n\n');
# for line in title:
# file.write(line);
while 1:
file = open('avfun1.txt', 'w');
for n in range(3600,9000):
try:
resp = urllib2.urlopen('http://www.avfun1.com/forum.php?mod=viewthread&tid='+repr(n)+'&mobile=yes', timeout = 2);
respread = resp.read();
pattern = re.compile(r'\[mukio=file\]\S.*mp4');
match = pattern.search(respread);
print "pid = " + repr(n)
if match:
threading.Thread(target=geturltitle(match, file)).start();
# else:
# continue;
pass
except Exception, e:
print e;
pass
else:
pass
finally:
pass
file.close();
break;
import re, urllib2,threading
def geturltitle(match, file):
s = match.group();
p = re.compile(r'^\[mukio=file\]');
downurl = re.sub(p, '', s);
print downurl;
# 过滤url
if downurl:
file.writelines(downurl);
file.write('\n');
# for line in downurl:
# file.write(line);
# 过滤title
pattern1 = re.compile(r'<meta name="keywords" content="\S.*"');
match1 = pattern1.search(respread);
if match1:
s1 = match1.group();
p1 = re.compile(r'^<meta name="keywords" content="');
title = re.sub(p1, '', s1);
print title;
if title:
file.writelines(title);
file.write('\n\n');
# for line in title:
# file.write(line);
while 1:
file = open('avfun1.txt', 'w');
for n in range(3600,9000):
try:
resp = urllib2.urlopen('http://www.avfun1.com/forum.php?mod=viewthread&tid='+repr(n)+'&mobile=yes', timeout = 2);
respread = resp.read();
pattern = re.compile(r'\[mukio=file\]\S.*mp4');
match = pattern.search(respread);
print "pid = " + repr(n)
if match:
threading.Thread(target=geturltitle(match, file)).start();
# else:
# continue;
pass
except Exception, e:
print e;
pass
else:
pass
finally:
pass
file.close();
break;
2.rename title from file
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re, os
dir = "/Users/apple/Downloads/avfun1/" #文件目录
if os.path.isdir(dir): #检验目录是否有误
print ("Directory exists!")
else:
print ("Directory not exist.")
filelist=os.listdir(dir+'aaa')
file = open(dir+'avfun1.txt', 'rb');
dir = dir + 'aaa'
'''for line in file:
print line
'''
str = file.read()
for name in filelist:
match = re.search(name+r'\n\S.*', str)
if match:
str1 = match.group();
tt_match = re.search(r'[^\d.mp4\n].*$', str1)
newfile = tt_match.group()+'.mp4' #获取匹配名存为newfile
print name
print newfile
os.rename(os.path.join(dir,name),os.path.join(dir,newfile))
else:
#-*- coding:utf-8 -*-
import re, os
dir = "/Users/apple/Downloads/avfun1/" #文件目录
if os.path.isdir(dir): #检验目录是否有误
print ("Directory exists!")
else:
print ("Directory not exist.")
filelist=os.listdir(dir+'aaa')
file = open(dir+'avfun1.txt', 'rb');
dir = dir + 'aaa'
'''for line in file:
print line
'''
str = file.read()
for name in filelist:
match = re.search(name+r'\n\S.*', str)
if match:
str1 = match.group();
tt_match = re.search(r'[^\d.mp4\n].*$', str1)
newfile = tt_match.group()+'.mp4' #获取匹配名存为newfile
print name
print newfile
os.rename(os.path.join(dir,name),os.path.join(dir,newfile))
else:
print match