Python168的学习笔记7
关于多线程操作。
对于IO操作,如访问网站,写入磁盘这种需要时间等待响应的操作,多个cpu也几乎不能提高效率。
对于CPU密集型操作,如这个格式转换,可以通过多个cpu同时去进行。
但是对于python来讲,python存在GIL全局解释器的锁,导致只有一个python线程能被解释器接收。所以等于python只能对IO操作使用线程操作。
1 #coding:utf8 2 import csv 3 from xml.etree.ElementTree import Element,ElementTree 4 import requests 5 from StringIO import StringIO 6 from test_retractxml import pretty 7 8 def download(url): 9 #IO操作很慢,因为不能直接得到数据。如这步:是发送请求,等待数据,在等待的过程中让出CPU,自己睡眠。 10 response = requests.get(url,timeout=3) 11 if response.ok: 12 return StringIO(response.content) 13 14 def csvToxml(scsv,fxml): 15 #这是CPU密集型操作,多个CPU可以同时操作 16 reader = csv.reader(scsv) 17 headers = reader.next() 18 headers = map(lambda h:h.replace(' ',''),headers) 19 20 root = Element('Data') 21 for row in reader: 22 eRow = Element('Row') 23 root.append(eRow) 24 for tag,text in zip(headers,row): 25 e = Element(tag) 26 e.text = text 27 eRow.append(e) 28 29 pretty(root) 30 et = ElementTree(root) 31 et.write(fxml) 32 33 34 def handle(sid): 35 print 'Download ...(%d)' % sid 36 url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz' 37 url %= str(sid).rjust(6,'0') 38 rf = download(url) 39 if rf is None:return 40 41 print 'Convert to XML...(%d)' % sid 42 fname = str(sid).rjust(6,'0')+'.xml' 43 with open(fname,'wb') as wf: 44 csvToxml(rf, wf) 45 46 from threading import Thread 47 48 ''' 49 t = Thread(target=handle,args=(1,)) 50 t.start() 51 52 print 'main thread' 53 ''' 54 class MyThread(Thread): 55 def __init__(self,sid): 56 Thread.__init__(self) 57 self.sid = sid 58 59 def run(self): 60 handle(self.sid) 61 62 threads = [] 63 for i in xrange(1,11): 64 t = MyThread(i) 65 threads.append(t) 66 t.start() 67 68 for t in threads: 69 t.join() 70 71 print 'main thread' 72 #t.join()#阻塞函数,保证主线程在所有子线程结束后再退出 73 74 75 ''' 76 #这是串行的方法 77 for sid in xrange(1,11): 78 print 'Download ...(%d)' % sid 79 url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz' 80 url %= str(sid).rjust(6,'0') 81 rf = download(url) 82 if rf is None:continue 83 84 print 'Convert to XML...(%d)' % sid 85 fname = str(sid).rjust(6,'0')+'.xml' 86 with open(fname,'wb') as wf: 87 csvToxml(rf, wf) 88 '''
线程间通信,可以用全局变量,但是不够安全,可以用Queue.Queue来存储通信内容。Queue作为线程安全的队列。
1 #coding:utf8 2 import requests 3 import csv 4 from xml.etree.ElementTree import Element,ElementTree 5 from test_retractxml import pretty 6 from threading import Thread 7 from StringIO import StringIO 8 9 from Queue import Queue 10 11 12 class DownloadThread(Thread): 13 14 def __init__(self,sid,queue): 15 Thread.__init__(self) 16 self.sid = sid 17 self.url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz' 18 self.url %=str(sid).rjust(6,'0') 19 self.queue = queue 20 21 def download(self,url): 22 response = requests.get(url,timeout=3) 23 if response.ok: 24 return StringIO(response.content) 25 26 def run(self): 27 print'download',self.sid 28 data = self.download(self.url) 29 self.queue.put((self.sid,data)) 30 31 32 class ConverThread(Thread): 33 def __init__(self,queue): 34 Thread.__init__(self) 35 self.queue = queue 36 37 def csvToxml(self,rf,wf): 38 reader = csv.reader(rf) 39 headers = reader.next() 40 headers = map(lambda h:h.replace(' ',''),headers) 41 42 root = Element('Data') 43 for row in reader: 44 eRow = Element('Row') 45 root.append(eRow) 46 for tag,text in zip(headers,row): 47 e = Element(tag) 48 e.text = text 49 eRow.append(e) 50 51 pretty(root) 52 et = ElementTree(root) 53 et.write(wf) 54 55 def run(self): 56 while True: 57 sid,data = self.queue.get() 58 print 'Convert', sid 59 if sid == -1: 60 break 61 if data: 62 fname = str(sid).rjust(6,'0')+'.xml' 63 with open(fname,'wb') as wf: 64 self.csvToxml(data, wf) 65 66 67 68 q = Queue() 69 dThreads = [DownloadThread(i,q) for i in xrange(1,11)] 70 cThread = ConverThread(q) 71 72 for t in dThreads:#多个线程下载 73 t.start() 74 75 cThread.start()#一个线程处理 76 77 for t in dThreads: 78 t.join() 79 80 q.put((-1,None))
由于全局锁GIL的存在,无法用多个线程来对cpu密集操作,所以此例子中是1,用多个线程来进行IO操作;2,将所有下载的内容传给1个线程进行转换。他们之间的交换是通过存入Queue这个安全队列里面。
而进程之间的的事件通知,需要调用thread库里的Event。事件的等待是Event.wait(),事件的响应是Event.set(),需要注意的是,set之后事件就不会再wait,需要Event.clear()来重新激活wait。要把等待,响应的逻辑弄清楚。
这节还引入了守护线程setDaemon的概念,当其值为True时 ,其他线程结束时,自身也会结束。
#coding:utf8class DownloadThread(Thread): ****
class ConverThread(Thread): def __init__(self,queue,cEvent,tEvent): Thread.__init__(self) self.queue = queue self.cEvent = cEvent self.tEvent = tEvent def csvToxml(self,rf,wf): ****
def run(self): count = 0 while True: sid,data = self.queue.get() print 'Convert', sid if sid == -1: self.cEvent.set() self.tEvent.wait() break if data: fname = str(sid).rjust(6,'0')+'.xml' with open(fname,'wb') as wf: self.csvToxml(data, wf) count += 1 if count == 5: #注意这里的逻辑 self.cEvent.set()#激活cEvent,表示转换完成 self.tEvent.wait()#等待tEvent事件完成 self.tEvent.clear()#重新激活tEevent count = 0 import tarfile import os class TarThread(Thread): def __init__(self,cEvent,tEvent): Thread.__init__(self) self.count = 0 self.cEvent = cEvent self.tEvent = tEvent self.setDaemon(True)#守护线程,其他线程退出后,他也退出 def tarXML(self): self.count += 1 tfname = '%d.tgz'%self.count tf = tarfile.open(tfname,'w:gz')#打包命令,打包格式为gz for fname in os.listdir('.'):#遍历当前文件夹的文件 if fname.endswith('.xml'):#找到.xml结尾的文件 tf.add(fname)#添加到压缩包中 os.remove(fname)#删除掉已添加加的文件 tf.close() if not tf.members:#如果打包文件为空,则删除 os.remove(tfname) def run(self): while True: self.cEvent.wait()#等待cEvent事件 self.tarXML() self.cEvent.clear()#重新激活等待 self.tEvent.set()#激活tEvent,表示完成打包 if __name__ == '__main__': q = Queue() dThreads =[DownloadThread(i,q) for i in xrange(1,11)] cEvent = Event() tEvent = Event() cThread = ConverThread(q,cEvent,tEvent) tThread = TarThread(cEvent,tEvent) tThread.start()#注意这里要start线程 for t in dThreads: t.start() cThread.start() for t in dThreads: t.join() q.put((-1,None)) print 'main thread'
本地线程这一章开始之后都是用了python3,我暂时还是想用python2来实现,所以先放一下,以后在回来补充。
线程池:pass
多进程:pass
:pass