Python168的学习笔记7

关于多线程操作。

对于IO操作,如访问网站,写入磁盘这种需要时间等待响应的操作,多个cpu也几乎不能提高效率。

对于CPU密集型操作,如这个格式转换,可以通过多个cpu同时去进行。

但是对于python来讲,python存在GIL全局解释器的锁,导致只有一个python线程能被解释器接收。所以等于python只能对IO操作使用线程操作。

 1 #coding:utf8
 2 import csv
 3 from xml.etree.ElementTree import Element,ElementTree
 4 import requests
 5 from StringIO import StringIO
 6 from test_retractxml import pretty
 7 
 8 def download(url):
 9     #IO操作很慢,因为不能直接得到数据。如这步:是发送请求,等待数据,在等待的过程中让出CPU,自己睡眠。
10     response = requests.get(url,timeout=3)
11     if response.ok:
12         return StringIO(response.content)
13 
14 def csvToxml(scsv,fxml):
15     #这是CPU密集型操作,多个CPU可以同时操作
16     reader = csv.reader(scsv)
17     headers = reader.next()
18     headers = map(lambda h:h.replace(' ',''),headers)
19     
20     root = Element('Data')
21     for row in reader:
22         eRow = Element('Row')
23         root.append(eRow)
24         for tag,text in zip(headers,row):
25             e = Element(tag)
26             e.text = text
27             eRow.append(e)
28             
29     pretty(root)
30     et = ElementTree(root)
31     et.write(fxml)
32     
33     
34 def handle(sid):
35     print 'Download ...(%d)' % sid
36     url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz'
37     url %= str(sid).rjust(6,'0')
38     rf = download(url)
39     if rf is None:return
40     
41     print 'Convert to XML...(%d)' % sid
42     fname = str(sid).rjust(6,'0')+'.xml'
43     with open(fname,'wb') as wf:
44         csvToxml(rf, wf)
45         
46 from threading import Thread
47 
48 '''
49 t = Thread(target=handle,args=(1,))
50 t.start()
51 
52 print 'main thread'
53 '''
54 class MyThread(Thread):
55     def __init__(self,sid):
56         Thread.__init__(self)
57         self.sid = sid
58         
59     def run(self):
60         handle(self.sid)
61 
62 threads = []
63 for i in xrange(1,11):
64     t = MyThread(i)
65     threads.append(t)
66     t.start()
67     
68 for t in threads:
69     t.join()
70     
71 print 'main thread'
72 #t.join()#阻塞函数,保证主线程在所有子线程结束后再退出
73 
74 
75 '''
76     #这是串行的方法
77     for sid in xrange(1,11):
78         print 'Download ...(%d)' % sid
79         url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz'
80         url %= str(sid).rjust(6,'0')
81         rf = download(url)
82         if rf is None:continue
83         
84         print 'Convert to XML...(%d)' % sid
85         fname = str(sid).rjust(6,'0')+'.xml'
86         with open(fname,'wb') as wf:
87             csvToxml(rf, wf)
88 '''

 

线程间通信,可以用全局变量,但是不够安全,可以用Queue.Queue来存储通信内容。Queue作为线程安全的队列。

 1 #coding:utf8
 2 import requests
 3 import csv
 4 from xml.etree.ElementTree import Element,ElementTree
 5 from test_retractxml import pretty
 6 from threading import Thread
 7 from StringIO import StringIO
 8 
 9 from Queue import Queue
10 
11 
12 class DownloadThread(Thread):
13     
14     def __init__(self,sid,queue):
15         Thread.__init__(self)
16         self.sid = sid
17         self.url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz'
18         self.url %=str(sid).rjust(6,'0')
19         self.queue = queue
20     
21     def download(self,url):
22         response = requests.get(url,timeout=3)
23         if response.ok:
24             return StringIO(response.content)
25         
26     def run(self):
27         print'download',self.sid
28         data = self.download(self.url)
29         self.queue.put((self.sid,data))
30         
31             
32 class ConverThread(Thread):
33     def __init__(self,queue):
34         Thread.__init__(self)
35         self.queue = queue
36         
37     def csvToxml(self,rf,wf):
38         reader = csv.reader(rf)
39         headers = reader.next()
40         headers = map(lambda h:h.replace(' ',''),headers)
41         
42         root = Element('Data')
43         for row in reader:
44             eRow = Element('Row')
45             root.append(eRow)
46             for tag,text in zip(headers,row):
47                 e = Element(tag)
48                 e.text = text
49                 eRow.append(e)
50                 
51         pretty(root)
52         et = ElementTree(root)
53         et.write(wf)
54         
55     def run(self): 
56         while True:
57             sid,data = self.queue.get()
58             print 'Convert', sid
59             if sid  == -1:
60                 break
61             if data:
62                 fname = str(sid).rjust(6,'0')+'.xml'
63                 with open(fname,'wb') as wf:
64                     self.csvToxml(data, wf)        
65 
66 
67 
68 q = Queue()
69 dThreads = [DownloadThread(i,q) for i in xrange(1,11)]
70 cThread = ConverThread(q)
71 
72 for t in dThreads:#多个线程下载
73     t.start()
74     
75 cThread.start()#一个线程处理
76 
77 for t in dThreads:
78     t.join()
79 
80 q.put((-1,None))

 

由于全局锁GIL的存在,无法用多个线程来对cpu密集操作,所以此例子中是1,用多个线程来进行IO操作;2,将所有下载的内容传给1个线程进行转换。他们之间的交换是通过存入Queue这个安全队列里面。

 

而进程之间的的事件通知,需要调用thread库里的Event。事件的等待是Event.wait(),事件的响应是Event.set(),需要注意的是,set之后事件就不会再wait,需要Event.clear()来重新激活wait。要把等待,响应的逻辑弄清楚。

这节还引入了守护线程setDaemon的概念,当其值为True时 ,其他线程结束时,自身也会结束。

#coding:utf8class DownloadThread(Thread):
            ****
class ConverThread(Thread): def __init__(self,queue,cEvent,tEvent): Thread.__init__(self) self.queue = queue self.cEvent = cEvent self.tEvent = tEvent def csvToxml(self,rf,wf):    ****

def run(self): count = 0 while True: sid,data = self.queue.get() print 'Convert', sid if sid == -1: self.cEvent.set() self.tEvent.wait() break if data: fname = str(sid).rjust(6,'0')+'.xml' with open(fname,'wb') as wf: self.csvToxml(data, wf) count += 1 if count == 5: #注意这里的逻辑 self.cEvent.set()#激活cEvent,表示转换完成 self.tEvent.wait()#等待tEvent事件完成 self.tEvent.clear()#重新激活tEevent count = 0 import tarfile import os class TarThread(Thread): def __init__(self,cEvent,tEvent): Thread.__init__(self) self.count = 0 self.cEvent = cEvent self.tEvent = tEvent self.setDaemon(True)#守护线程,其他线程退出后,他也退出 def tarXML(self): self.count += 1 tfname = '%d.tgz'%self.count tf = tarfile.open(tfname,'w:gz')#打包命令,打包格式为gz for fname in os.listdir('.'):#遍历当前文件夹的文件 if fname.endswith('.xml'):#找到.xml结尾的文件 tf.add(fname)#添加到压缩包中 os.remove(fname)#删除掉已添加加的文件 tf.close() if not tf.members:#如果打包文件为空,则删除 os.remove(tfname) def run(self): while True: self.cEvent.wait()#等待cEvent事件 self.tarXML() self.cEvent.clear()#重新激活等待 self.tEvent.set()#激活tEvent,表示完成打包 if __name__ == '__main__': q = Queue() dThreads =[DownloadThread(i,q) for i in xrange(1,11)] cEvent = Event() tEvent = Event() cThread = ConverThread(q,cEvent,tEvent) tThread = TarThread(cEvent,tEvent) tThread.start()#注意这里要start线程 for t in dThreads: t.start() cThread.start() for t in dThreads: t.join() q.put((-1,None)) print 'main thread'

 

本地线程这一章开始之后都是用了python3,我暂时还是想用python2来实现,所以先放一下,以后在回来补充。

线程池:pass

多进程:pass

:pass

posted @ 2016-11-13 23:51  Nanrou  阅读(226)  评论(0编辑  收藏  举报