七月在线爬虫班学习笔记(二)——Python基本语法及面向对象
代码格式
syntax基本语法
1 a = 1234 2 print(a) 3 a = 'abcd' 4 print(a) 5 6 try: 7 print(b) 8 except Exception as e: 9 print(e) 10 11 a = [1, 2, 3 , 4] 12 13 def func(a): 14 a[0] = 2 15 16 func(a) 17 print(a) 18 19 try: 20 # Python 2.x 支持 21 print(100, 200, 300) 22 except Exception as e: 23 print(e)
1 1234 2 abcd 3 name 'b' is not defined 4 [2, 2, 3, 4] 5 100 200 300
condition_and_loop 循环判断
1 score = 80 2 if score > 90: 3 print('A') 4 elif score > 70: 5 print('B') 6 elif score >= 60: 7 print('C') 8 else: 9 print('D') 10 11 total = 0 12 i = 1 13 while i <= 100: 14 total += i 15 i += 1 # 没有++i或者--i 16 print(total) 17 18 ''' 19 for循环只作用于容器!!! 20 没有这种写法: 21 for (i = 0; i < 100; ++i): 22 # TODO 23 上面这种循环只能用while实现 24 ''' 25 26 i = 0 27 while i < 3: 28 j = 0 29 while j <= 3: 30 if j == 2: 31 j += 1 32 continue # 又去了while j <= 3 33 print(i, j) 34 j += 1 35 i += 1
1 B 2 5050 3 0 0 4 0 1 5 0 3 6 1 0 7 1 1 8 1 3 9 2 0 10 2 1 11 2 3
func函数
def hello(who = 'world'): print('hello %s!' % (who)) hello() hello('sea') # f(x) = x * 5 + 100 # g(x) = x * 5; f(x) = x + 100 # => f(g(x)) = x * 5 + 100 def g(x): return x * 5 def f(gf, x): return gf(x) + 100 print(f(g, 100)) print(f(lambda x: x * 5, 100)) def f(gf, x, y): return gf(x, y) + 100 print(f(lambda x, y: x * y, 100, 200)) #输出结果 hello world! hello sea! 600 600 20100
ct_list数组
# list就是数组 li = [1, 2, 3, 4, 5] # 遍历 for i in li: # print(i) pass # 用range模拟for (i = 0; i < x; ++i) # range(x) => [0, x - 1] # range(x, y) => [x, y - 1] # range(x, y, z) => [x, x + z,..., < y] for i in range(len(li)): # print(li[i]) pass for i in range(1, 10, 2): print(i) #输出结果 1 3 5 7 9
1 # 负数索引 2 print(li[-1]) 3 print(li[-2]) 4 5 # 负数step的range => [x, x - z, ..., > z] 6 for i in range(3, -1, -1): 7 print(i)
1 #输出结果 2 5 3 4 4 3 5 2 6 1 7 0
1 # 添加元素 2 li = [] 3 li.append(1) 4 li.append(2) 5 li.append('abc') 6 li.append(['d', 'e', 'f']) 7 print(li) 8 9 # 按元素添加数组 10 li = [1, 2] 11 li_2 = [3, 4, 5] 12 # 我们想要[1, 2, 3, 4, 5] 13 # li.append(li_2) => [1, 2, [3, 4, 5]] 14 li.extend(li_2) 15 print(li) 16 17 # 删除元素 18 li.pop() # => [1, 2, 3, 4] 19 print(li) 20 li.pop(2) # => [1, 2, 4] 21 print(li) 22 23 24 #输出结果 25 [1, 2, 'abc', ['d', 'e', 'f']] 26 [1, 2, 3, 4, 5] 27 [1, 2, 3, 4] 28 [1, 2, 4]
1 li = [5, 8, 7, 4, 2, 3] 2 li.sort() 3 print(li) 4 # lambda帮助排序 5 li = [[5, 2], [3, 8], [2, 11], [7, 6]] 6 # li.sort(key = lambda x: x[0]) # 参数名字 7 # 与lamda等价写法 8 def item_key(x): 9 return x[0] 10 li.sort(key = item_key) 11 print(li) 12 13 14 #输出结果 15 [2, 3, 4, 5, 7, 8] 16 [[2, 11], [3, 8], [5, 2], [7, 6]]
ct_tuple只读数组
1 # 只读数组 2 tp = (1, 2, 3) 3 try: 4 tp[0] = 100 5 except Exception as e: 6 print(e)
ct_set没有重复元素的数组
1 s = set([1, 2, 2, 3, 3, 4, 5]) 2 print(s) 3 s = set((2, 3, 4, 5, 6, 2, 1, 9)) 4 print(s) 5 6 #输出 7 {1, 2, 3, 4, 5} 8 {1, 2, 3, 4, 5, 6, 9}
ct_dict 字典(哈希表)
1 # key<->value对应的hash表 2 di = {'k1': 'v1', 'k2': 'v2'} 3 di['k3'] = 'v3' 4 di['k4'] = 'v4' 5 6 for k in di: 7 print(di[k]) 8 9 for k, v in di.items(): 10 print(k, v) 11 12 13 #输出 14 v3 15 v4 16 v2 17 v1 18 k3 v3 19 k4 v4 20 k2 v2 21 k1 v1
slice数组切片
1 # [1, 2, 3, 4, 5] 2 # => [1, 2, 3] 3 # => [3, 4] 4 li = [1, 2, 3, 4, 5] 5 li_0_2 =li[0:3] # 0 <= ? < 3 6 # 等价li[:3] 7 print(li_0_2) 8 # [start, end, step] => [start, start + step, ..., < end] 9 # start默认是0,end默认-1,step默认1 10 li_last_3 = li[-1:-4:-1] 11 print(li_last_3) 12 13 # 直接用切片反转数组 14 print(li[::-1]) 15 print(li[-2::-1]) 16 17 # 切片是复制 18 li_0_2[-1] = 100 19 print(li) 20 21 22 #输出 23 [1, 2, 3] 24 [5, 4, 3] 25 [5, 4, 3, 2, 1] 26 [4, 3, 2, 1] 27 [1, 2, 3, 4, 5
list_string字符串与数组的关系
1 s = 'abcdefg' 2 try: 3 str[0] = 'x' 4 except Exception as e: 5 print(e) 6 7 # 修改字符串 8 li = list(s) 9 # print(li) 10 li[0] = 'x' 11 s = ''.join(li) 12 print(s) 13 s = '-'.join(li) 14 print(s) 15 16 # 切割 17 s = 'abc,def,ghi' 18 p1, p2, p3 = s.split(',') 19 print(p1, p2, p3) 20 21 # 下标访问和切片 22 s = 'abcdefg' 23 print(s[0], s[-1]) 24 print(s[2:5]) 25 26 #输出 27 'type' object does not support item assignment 28 xbcdefg 29 x-b-c-d-e-f-g 30 abc def ghi 31 a g 32 cde
obj面向对象
1 # 用type查看对象类型 2 print(type([1, 2, 3, 4])) 3 print(type('abcd')) 4 print(type({1:2, 2:3})) 5 6 # 用dir查看属性和方法 7 print(dir(list))
1 <class 'list'> 2 <class 'str'> 3 <class 'dict'> 4 ['__add__', '__class__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__imul__', '__init__', '__iter__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__rmul__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'clear', 'copy', 'count', 'extend', 'index', 'insert', 'pop', 'remove', 'reverse', 'sort']
1 class Clazz(object): 2 # self参考C++的this指针! 3 def __init__(self, x, y): 4 self.x = x 5 self.y = y 6 7 # 声明成员函数的时候,第一个参数一定是self,不要忘记! 8 def display(self): 9 print(self.x, self.y) 10 11 print(type(Clazz)) 12 clz = Clazz(100, 200) 13 clz.display() # => display(clz) 14 15 class Base: 16 def run(self): 17 print('Base::run') 18 19 class Tom(Base): 20 def run(self): 21 print('Tom::run') 22 23 t = Tom() 24 print(isinstance(t, Base)) 25 t.run()
<class 'type'> 100 200 True Tom::run
1 def run(runner): 2 runner.run() 3 4 class R1: 5 def run(self): 6 print('R1::run') 7 8 class R2: 9 def run(self): 10 print('R2::run') 11 12 run(R1()) 13 run(R2()) 14 15 #输出 16 R1::run 17 R2::run
file_rw文件读写
1 # 打开文件操作可能失败,异常处理后不会造成资源泄露等情况 2 with open('text.txt', 'w') as f: 3 f.write('''1234 4 abcd 5 nefgh''') 6 7 with open('text.txt', 'rb') as f: 8 print(f.read()) 9 10 with open('text.txt') as f: 11 for line in f.readlines(): 12 print(line.strip()) 13 14 #输出 15 b'1234\r\n abcd\r\n nefgh' 16 1234 17 abcd 18 nefgh
threads多线程
1 import threading 2 3 def thread_func(x): 4 # 自己加sleep和其它复杂操作看效果 5 print('%d\n' % (x * 100)) 6 7 threads = [] 8 for i in range(5): 9 threads.append(threading.Thread(target = thread_func, args = (100, ))) 10 11 for thread in threads: 12 thread.start() 13 14 for thread in threads: 15 thread.join() 16 17 18 #输出 19 10000 20 21 10000 22 23 10000 24 25 10000 26 27 10000
error_handling错误和异常处理
1 # 错误处理 2 3 try: 4 r = 10 / 0 5 except ZeroDivisionError as e: 6 print(type(e)) 7 print(e) 8 finally: 9 # 主要防止资源泄露(服务端。客户端一般不会资源泄露)! 10 print('Always come here.') 11 12 #输出 13 <class 'ZeroDivisionError'> 14 division by zero 15 Always come here.
1 import logging 2 logging.basicConfig(filename='example.log',level=logging.DEBUG) 3 logging.debug('This message should go to the log file') 4 logging.info('So should this') 5 logging.warning('And this, too')
情不知所起一往而深