函数   调用  abs   即求绝对值  只有一个函数
      比较函数  cmp(x,y) 有两个参数 x>y  1    x<y   -1    x=y  0 
      数据类型转化   int()
     定义函数 自定义求绝对值的my-abs函数   def my-abs(a):
                                            if a>=0:  
                                              return  a


                                            else:
                                              return  -a

     def  power(x):         def power(x,n):     
      return  x*x                 s=1
                                  while  n>0:
                                       n=n-1
                                      s=s*x
                                 return s 
pycharm必须用英文半角  注意空格使用  
if 循环语句:  if sequence:                                                            if 1>2(True包括:表示非零数的所有值)   False:表示0,None,空的量等
                   print                                                                    print "ok"         
           只能控制下边带缩进的代码,一定要缩进,一般用空格缩进四个,冒号不能丢         print "main ok"    \不受if控制\
           多个if同时使用应用:进行分隔,可以使用elif
           x = int(raw_input("please input x:"))
#coding:utf-8

x = int(raw_input("please input:"))
if x>=90:
    print "A"                           \三个if都执行且else只与第三个if有关系\                          

if x>=80:
    print "B"
if x>=70:
    print "C"
else:
    print "bad"

 

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
please input:90
A
B
C

Process finished with exit code 0

 

#coding:utf-8

x = int(raw_input("please input:"))                \if-elif分开来判断,互不相关且else是三个判断都不成立才执行的\
if x>=90:
    print "A"
elif x>=80:
    print "B"
elif x>=70:
    print "C"
else:
    print "bad"

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
please input:99
A

Process finished with exit code 0

嵌套模式
#coding:utf-8

x = int(raw_input("please input x:"))              \x和y同事成立才会输出\
y = int(raw_input("please input y:"))
if x>=90:
    if y>90:
        print "A"
elif x>=80:
    print "B"
elif x>=70:
    print "C"
else:
    print "bad"

 

#coding:utf-8

x = int(raw_input("please input x:"))
y = int(raw_input("please input y:"))
if x>=90:                                                    可以使用and,or,not
    if y>90:              \相当于if x>=90 and y>90:\
        print "A"    \ify<90成立则输出\
    print "y>=90"     \if y>90不成立则输出\
elif x>=80:
    print "B"
elif x>=70:
    print "C"
else:
    print "bad"

 

 


for循环(有次数的循环)
   循环语句:  for inerating_var in sequence:
                          print
             s=()元组print s[x]     s = "hello"               l=[]列表print l
                                      for x in range(len(s)):
                                          print  s[x]     
            迭代:range(i,j ,[进步值])类似于切片 显示具体的数字             xrange(10)只显示xrange(10)不显示具体数字
              range(10)输出是十次且是从零到九   range(1,11)输出是十次且是从一到十   range(1,11,2)输出1,3,5,7,9


#coding:utf-8
s = "hello"
l = [1,2,3,'a','b']
t = (7,8,89,'x','f')
d = {1:111,2:222,5:555,3:3333}
for  x in l:      #在列表中取值的格式
    if x>=2:
        print x
for x in range(len(s)):       #字符串取值的方法
    print  s[x]
for x in d:
    print x      #只取出的是k
    print d[x]     #只取出v的值                   \一个for下边有多个print,三个print会同时执行多次\
    print d.items()   #取出(k,v)的键值对
for k,v in d.items():
    print k
    print v

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
2
3
a
b
h
e
l
l
o
1
111
[(1, 111), (2, 222), (3, 3333), (5, 555)]
2
222
[(1, 111), (2, 222), (3, 3333), (5, 555)]
3
3333
[(1, 111), (2, 222), (3, 3333), (5, 555)]
5
555
[(1, 111), (2, 222), (3, 3333), (5, 555)]
1
111
2
222
3
3333
5
555

Process finished with exit code 0

字典胡拆分  for k,v in d:                      for k,v in d.items():
                 print k                               print k
                 print v 类型错误                      print v类型正确

循环控制

#coding:utf-8
import time    #时间模块,两个元素之间出现的时间间隔
for  x in range(3):         #range(3)表示从0到2
    print x
    time.sleep(1)  #括号中数据表示间隔的时间
else:
    print "ending"   #只有正常结束才会ending      出现KeyboardInterrupt(终止信号)表示强制终止了,用Ctrl+c 此时不出现ending

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
0
1
2
ending

 


   
# encoding:utf-8                             
for x in range(35):                            
    print x
    if x==3:
        pass          pass #代码庄起到站位作用
    if x==2:
        print "hello22"
        continue           #跳过当次循环的余下语句
    if x==5:
        exit()             #exit()跳出整个程序
    if x ==6:
        break                #break跳出循环,不影响下边循环执行,也不执行else
    print"#"*50
else:
    print "ending"
for x in range(1,11):
    print "------>",x            # print "---->",x  逗号不可丢输出为---->x


加了exit()之后的运行结果
0
##################################################
1
##################################################
2
hello22
3
##################################################
4
##################################################
5

 

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
0
##################################################
1
##################################################
2
hello22
3
##################################################
4
##################################################
5
##################################################
6
------> 1
------> 2
------> 3
------> 4
------> 5
------> 6
------> 7
------> 8
------> 9
------> 10


while循环
while  True:
    print "hello"      #死循环


# encoding:utf-8
x=""  #设置一个全局变量,空字符
while x != "q":       #x!=q则为真
    print "hello"
    x = raw_input("please input something,q for quit:")
    if not x :    #逻辑否,即回车键
        break
    if x=="c":
        continue              #输入c则不会出现one more time...
    print "one more time...."
else:
    print "ending...."           #ending....出现是正常程序正常结束即输入q,不是按回车键或Ctrl+c


输入q的结果
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
hello
please input something,q for quit:q
one more time....
ending....

 

 


定义函数名是两个以上单词的,第二个单词首字母要大写
def add():    再次调用时只需输入add()
单独的一个def操作不会出现效果,跟定义一个变量没有差别
# encoding:utf-8
a = 100                                          def = fun(x):
def fun():                                           print "i get a:",x    #x为变量
    if True:
        print "good"
        print a

if fun():
    print "ok"
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
good
100

Process finished with exit code 0
# encoding:utf-8
a = 100
def fun():
    if True:
        print "good"
        print a

fun()
fun()
fun()
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
good
100
good
100
good
100

Process finished with exit code 0
# encoding:utf-8
a = 100
def fun():
    if True:
        print "good"
print a

fun()
fun()
fun()
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
100
good
good
good

 

# coding:utf-8
a = 100
def fun():
    print "a"
fun()
if fun():
    print "ok"           #输出没有ok


D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled1/函数定义.py
a
a

Process finished with exit code 0


Process finished with exit code 0

形参和实参
#coding:utf-8                                     
def mashine(x,y='奶油'):                            
    print "制作一个",x,'元',y,'口味的冰淇淋!'                
                                                  
mashine( 3,'巧克力')                                 
                   
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
制作一个 3 元 巧克力 口味的冰淇淋!

Process finished with exit code 0
x和y要一一对应,若只输入y的值,mashine(y=‘奶油’)若只输入x的值,mashine(3)
输出一句话要用 "制作一个",x,'元',y,'口味的冰淇淋!'注意符号的使用

 

局部变量和全部变量       #若局部变量与全部变量重名含义仍不变                     
#coding:utf-8
a= 'i am global var'/*全部变量*/
def fun():
    a=100/*局部变量,不可以被当作全部变量使用*/
    global x/*局部变量,只要函数fun()被调用,x就可以当全部变量来使用*/
    x=200
    print a
fun()
print x

 


函数返回值    sum([1,2,3,4,5])求和
#coding:utf-8
def f(x,y):
    print  x+y/*x+y即计算x+y若为"x+y"则直接输出x+y*/
f(97,98)
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
195

Process finished with exit code 0
#coding:utf-8
def f(x,y):
    print  "welcome"
    print x+y
f(2,3)
z=f(3,4)
print z
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
welcome
5
welcome
7
None

Process finished with exit code 0
#coding:utf-8
def f():
    return "hello"
z= f()/*函数被调用后必须返回一个指定的值,否则无法输出*/
print z
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
hello

Process finished with exit code 0
#coding:utf-8
def f():
    return "hello"/*同时输入两个return,只有第一个执行即return执行后函数终止*/
    return "rr"
z= f()
print z
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
hello
Process finished with exit code 0
向函数传元组和字典
#coding:utf-8
t=('name','milo')
def f(x,y):
    print " % s: % s" % ( x,y)/*注意这种写法*/
f(*t)/*输出t元组对应的两个值*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
 name: milo

Process finished with exit code 0
#coding:utf-8
def f(name='name',age=0):
    print "name:%s"% name
    print "age: %d"% age
f("milo",30)
d={"name":'milo',"age":20}/*字典的无序性*/
f(**d)/*定义字典调用的方法*/
d={'a':9,'n':'milo'}
f(d['n'],d['a'])/*两个值要一一对应*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
name:milo
age: 30
name:milo
age: 20
name:milo
age: 9
处理多余的实数
#coding:utf-8
def f(x,*args):
    print x
    print args
f(1,2,3)
def f(x,*args,**kwargs):/*一个*表示元组,两个**表示字典*/
    print x
    print args
    print kwargs
f(x=1,y=3,z=4)/*y=3,z=4根元组形式一样*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
1
(2, 3)
1
()
{'y': 3, 'z': 4}

Process finished with exit code 0
Process finished with exit code 0
lambda函数胡使用
#coding:utf-8
g = lambda x,y:x*y/*必须亦g=*/
print g(2,3)/*print必须有*/
swtich函数使用

def jia(x,y):
    return x+y
def jian(x,y):
    return x-y
def cheng(x,y):
    return x*y
def chu(x,y):
    return x/y
operator = {"+":jia,"-":jian,"*":cheng,"/":chu}
def f(x,o,y):
    print operator.get(o)(x,y)/*字典的用法*/
f(3,"+",3)
常用函数
pow(x,y)x的y次幂 pow(x,y,z)x的y次幂再除以z求模   divmod(x,y)交换x和y的位置   round()返回一个浮点数
callable()测试一个函数可不可以被调用   isinstance(l,list)判断对象的类型   cmp()比较两个字符串
range()快速生成一个序列   xrange()生成器,不出现序列,效率比range高很多 
类型转化函数
type()类型  int()整数型  long()长整形  float()浮点型  complex()复数型
str(x )                      将对象 x 转换为字符串
tuple(s )                    将序列 s 转换为一个元组
list(s )                     将序列 s 转换为一个列表
chr(x )                      将一个整数转换为一个字符
ord(x )                      将一个字符转换为它的整数值
hex(x )                      将一个整数转换为一个十六进制字符串
oct(x )                      将一个整数转换为一个八进制字符串
       

 


内置函数
string函数的应用/*必须是针对字符串使用的*/
用help查询的时候用法help(str.replace)
#coding:utf-8
s='13234443213223'
print s.replace('2','d')/*s必须标明.不可少*/
print s.replace('2','d',1))/*最后的1表示只把第一个2替换成d*/

 

str.capitalize()把字符串首字母转化为大写
str.split('x',y)做切割用的,x表示切割的位置,y表示切割的次数
string.replace(s,'hello','good')
序列处理函数
len()求长度
max()求最大值     filter(f,L)主要用于序列的操作    f表示取大于5的数,L表示一个列表,表示取出在L中符合f的值
#coding:utf-8

name=['milo','zou','tom']
age=[20,30,40]
tel=['133','156','234']
t=['x','y']
print zip(name,age,tel,t)          /*zip和map的区别*/
print map(None,name,age,tel,t)

a=[1,3,5]
b=[2,3,4]
def mf(x,y):
    return x*y
print map(None,a,b)/*输出a,b对应为位置的成对列表*/
print map(mf,a,b)/*把a,b列表的值在mf语句中实现*/

L=range(1,101)
def rf(x,y):         /*可以转化为reduce(lambda x,y:x+y,L)*/      若想取lL列表中的偶数值filter(lambda x:if x%2 ==0.1,L)  无法运行,其中包含两个函数
    return x +y                                                                      应该为filter(lambda x:x%2 ==0.1,L)
print reduce(rf,L)/*把L中的数据在rf中运算*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
[('milo', 20, '133', 'x'), ('zou', 30, '156', 'y')]
[('milo', 20, '133', 'x'), ('zou', 30, '156', 'y'), ('tom', 40, '234', None)]
[(1, 2), (3, 3), (5, 4)]
[2, 9, 20]
5050

Process finished with exit code 0

 

 

正则表达式(保持字符串原义)
re模块正则表达式模块接口


[]/*指定一个字符集,用来匹配其中的任意一个字符*/
#coding:utf-8
import re
s = r'abc'/*定义时r不能丢,只是正则的一种形式,表示abc字符串*/                      元字符在字符集中不起作用[amk$]
print re.findall(s,"aaaaaaaaaaaaaaaaaaaaaa")
print re.findall(s,"aaaaaaabcaaaaaaaaaabcaa")
st="top tip tap twp tep"
res = r"top"
print re.findall(res,st)
res = r"t[io]p"/*表示取tip或者top都被取到[]表示或者的意思*/
print re.findall(res,st)
res=r"t[^io]p"/*表示除了tip,top都能被取到,^表示除了*/
print re.findall(res,st)

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
[]
['abc', 'abc']
['top']
['top', 'tip']
['tap', 'twp', 'tep']

Process finished with exit code 0

 


#coding:utf-8
import re
s = "hello word, hello word"
r = r"hello"
print  re.findall(r,s)/*用r这个正则来匹配s这个字符串*/
r = r"^hello"/*只取开头单词hello*/
print  re.findall(r,s)
s = "word,hello word"
print re.findall(r,s)/*若开头不是hello,则输出一个空列表*/
r = r"word$"/*取最末尾的单词Word*/
print re.findall(r,s)
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['hello', 'hello']
['hello']
[]
['word']

Process finished with exit code 0


r = r"x[0-9]x"/*表示0到9这十个数字*/
print re.findall(r.‘x1x x2x x9x')
r = "t[abc$]"/*在方括号中$和^只表示符号,无任何实际意义*/


#coding:utf-8
import re
r = r"^abc"
print re.findall(r,"abc")
print re.findall(r,"^abc")
r = r"\^abc"/*\把特殊字符当成普通字符处理*/
print re.findall(r,"^abc")
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['abc']
[]
['^abc']

Process finished with exit code 0
\d匹配任何十进制数,它相当于[0-9]
\D匹配任何非数字字符,它相当于类[^0-9]
\s匹配任何空白字符,它相当于类[\t\n\r\f\v]
\S匹配任何非空字符,它相当于类[^\t\n\r\f\v]
\w匹配任何字母数字字符,它相当于类[a-zA-Z0-9]
\W匹配任何非字母数字字符,它相当于类[^a-zA-Z0-9]
  


#coding:utf-8
import re
r = r"^010-?\d{8}$"/*-?表示-可有可无,{x}表示\d重复x次,^表示开头数$表示结尾数*/
print re.findall(r,'010-2343245')
print re.findall(r,'010-23456789')

r = r"ab*"/*b的重复数字可以是零次一次或更多*/
print re.findall(r,"a")
print re.findall(r,"abbbbbb")
r = r"ab+"/*贪婪模式,做最大匹配即输出所有的字符*//*+表示重复数字至少有一次*/
print re.findall(r,"a")
print re.findall(r,"ab")
print re.findall(r,"abbbbbb")
r = r"ab+?"/*非贪婪模式,做最小匹配,即只输出重复数字一次*/
print re.findall(r,"abbbbbbbbbbbb")
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
[]
['010-23456789']
['a']
['abbbbbb']
[]
['ab']
['abbbbbb']
['ab']

Process finished with exit code 0

#coding:utf-8
import re
r = r"a{1,3}"/*至少有1个重复,最多有3个重复*/
print re.findall(r,'a')                                                      {0,}等同于*  {1,}等同于+   {0,1}等同于?                                                  
print re.findall(r,'d')
print re.findall(r,'aaaaa')/*若大于3,则分成多个字符串出现*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['a']
[]
['aaa', 'aa']

Process finished with exit code 0

 

 

 

#coding:utf-8
import re
r1 =r"\d{3,4}-?\d{8}\\"       /*加r时后边\不会被特殊处理,不加r,打印\就会报错*/
print re.findall(r1,"010-12345677\\")
p_tel=re.compile(r1)      \*用re.compile编译正则表达式*\
print p_tel
print p_tel.findall('010-234455628\\')   \*缺点:输出字节只根据要求长度进行截取*\

csvt_re = re.compile(r'csvt',re.I)        \*后边的re.I使用后可以不用区分大小写*\
print csvt_re.findall('CSVT')
print csvt_re.findall('Csvt')
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['010-12345677\\']
<_sre.SRE_Pattern object at 0x0258E248>
[]
['CSVT']
['Csvt']

Process finished with exit code 0


#coding:utf-8
import re
r1 = r"csvt.net"\*点的含义是点或者一个字母*\
print re.findall(r1,'csvt.net')
print re.findall(r1,'csvtonet')
print re.findall(r1,'csvt\nnet')
print re.findall(r1,'csvt\nnet',re.S)\*大写的S和点搭配使用的含义是包括换行在内的所有字符*\
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled1/.idea/正则符号的使用.py
['csvt.net']
['csvtonet']
[]
['csvt\nnet']

Process finished with exit code 0

Process finished with exit code 0

 

#coding:utf-8
import re
s="""
hello csvt
csvt hello
hello csvt hello
csvt hehe
"""
r = "^csvt"
print re.findall(r,s)
print re.findall(r,s,re.M)   /*若输入为多行的时候re.M可以与$和^一起使用,使^和$发挥之前的作用*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/正则表达式.py
[]
['csvt', 'csvt']

Process finished with exit code 0

 


#coding:utf-8
import re
tel = r"""
\d{3,4}
-?
\d{8}                           X与verbose意思一样
"""
print re.findall(tel,'010-12345678',re.X)/*正则为多行的时候用re.X可以输出成一行*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/正则表达式.py
['010-12345678']

Process finished with exit code 0

#coding:utf-8
import re
email = r"\w{3}@\w+(\.com|\.cn)"               \*可以有分组,返回值优先返回分组中的数据,分组中可以出现或这种关系*\
print re.findall(email,'zzz@csvt.com')
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/字符.py
['.com']

Process finished with exit code 0

 

 

 

 

 

正则爬虫网页
链接网址是要把想要的括起来,还有单双引号的互用 reg = r'src="(http://.+\.jpg)"'网址必须用""外边用''引号
图片的形式有两种jpg和gif


#coding:utf-8
import re
import urllib
def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?)\.jpg" pic_ext'
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1

html = getHtml("http://tieba.baidu.com/p/3815851566")
getImg(html)
   

 

 

浅拷贝和深拷贝(原数据改变拷贝数据不变)
import copy
a = [1,2,3,['a','b','c']]
b = a
c = copy.copy(a)            /*浅拷贝*/            浅拷贝整体id改变局部id不变    
print a                         若a再增加值,b会改变但c不变
print b                       若a对内层函数增加值,c也增加
print c
print id(a)
print id(b)
print id(c)
a.append('f')
print a
print b
print c
d = copy.deepcopy(a)              /*深拷贝*/           深拷贝整体局部id都改变
print d                                           若a对内层函数增加值,c不增加
print id(a)
print id(d)

 


[1, 2, 3, ['a', 'b', 'c']]
[1, 2, 3, ['a', 'b', 'c']]
[1, 2, 3, ['a', 'b', 'c']]
43979680
43979680
43988112
[1, 2, 3, ['a', 'b', 'c'], 'f']
[1, 2, 3, ['a', 'b', 'c'], 'f']
[1, 2, 3, ['a', 'b', 'c']]
[1, 2, 3, ['a', 'b', 'c'], 'f']
43979680
43925336

Process finished with exit code 0

 

爬虫

错误示例:
#coding:utf-8

import re
import urllib
import time
def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?)\.jpg" pic_ext='
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1


Q = "pn=y"
M = "http://tieba.baidu.com/p/3815851566"
for y in range(1,41):
    print  Q+W
html = getHtml(M+Q)
getImg(html)

 


错误示例:
#coding:utf-8

import re
import urllib

def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?)\.jpg" pic_ext='      \图片源代码的正确书写\
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1
M = "http://tieba.baidu.com/p/3815851566?pn="
for y in range(1,41):
    print  M,y
    A=M,y                 \应把y当成一个字符串str(y)\
    html = getHtml(A)
    getImg(html)


正解
#coding:utf-8

import re
import urllib

def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?\.jpg)" pic_ext='
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1
M = "http://tieba.baidu.com/p/3815851566?pn="
for y in range(1,41):
    print  M+str(y)
    A=M+str(y)
    html = getHtml(A)
    getImg(html)

 

 

 

 

 


正解:
#coding:utf-8

import re,os
import urllib
import time

def getHtml(url):                 # \获取网址\
    page = urllib.urlopen(url)
    html = page.read()
    return html


def getImg(html):          #\用正则查找符合图片条件的网址\


    reg = r'src="(http://imgsrc.*?)" pic_ext='   #  \多个网页的图片代码不同,要仔细查阅\ #括号里是你想的到的网址不包括引号
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:                         # \下载照片,并且附上文件名\
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1


for y in range(1,41):
    url = "http://tieba.baidu.com/p/3815851566?pn=" + str(y)             # \网页变化是网址的变化,并且要用字符串输出\ 
    print url
    html = getHtml(url)
    getHtml(html)

 


爬风景
# coding:utf-8

import re
import urllib
print 'a'

def  getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'data-src="(http://img.mp.itc.cn/.*?\.jpg)" style='

    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=116
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1
html = getHtml("http://www.tuniu.com/trips/10111346")
getImg(html)

爬文本


def getImg(html):
    reg = r'href"(http://.*?\.html)" target='
    imger = re.compile(reg)
    imglist = re.findall(imger,html)


  
  

 

 

 

 请问打开图像后会出现This image could not be loaded?(图片不能加载)? 5

 


 
有几种情况;
1、可能软件版本安装时丢失数据了!安装不成功!
2、可能图片有问题!
3、图片可能不是通用格式
4、你的电脑系统版本对新存的图片文件的数据是否有丢失?!!
第一种情况可能性最大!第四种次之。。。。。


将会输出gif,png,jpeg等图片类型

 

 

# coding:utf-8
import re
import urllib

url = "http://www.muyingzhijia.com/Shopping/subcategory.aspx?cateID=31"


def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html


def getDSJ(html):

    reg = r'a href="(.*?)" class="traceCount landingStats" target="_blank" title="(.*?)"'    target位置不能变且有一个空格的距离
    lsre = re.compile(reg)
    ls = re.finditer(lsre, html)

    fd = open('test.txt', 'a')
    for imgurl in ls:

        fd.write(imgurl.group(2) + '\t' + imgurl.group(1) + '\n')
    fd.close()

html = getHtml(url)

ls = getDSJ(html)
print 'ok'


对文件的读写操作
fo = open('/root/test.txt') #打开文件
fo.read() #读取文件
fo.close() #关闭文件  
f1 = file('/root/test.txt') #表示类
f1.close()  #一定要记得关闭文件
cat .txt   读取txt的文件
fnew = open('/root/new.txt','w')
fnew.write('hello  \n i am milo')
fnew.close() #必须关闭之后才会在文件中显示
fnew = open('/root/new.txt','r+')
fnew.read()
fnew.write("new con")
fenw.close()  #先读再写,新加的文字会在文件后边
fnew = open('/root/new.txt','r+')
fnew.write("nooooo")
fnew.close()  #直接写会出现在文件开头
r 只读    r+读写        w写入,先删除原文件,再重新写入,如果原文件没有则创建
w+ 读写,先删除原文件,再重新写入,如果原文件没有则创建(可以写入输出)
a  写入:在文件末尾追加新内容,文件不存在,创建之
a+  读写:在文件末尾追加新内容,文件不存在,创建之
b  打开二进制的文件 可以与r,w,a,+结合使用
U支持所有的换行符号  "\r""\n""\r\n"

 

 

f.readline()#一行一行的读取文件,超出后显示空字符串
f.readlines()#返回一个列表
f.next()#一行一行的读取,超出后停止迭代
f.writelines()#写入多行数据
f.seek(0,0)#指针在文件开头         选项=0表示文件指针指向从文件头部到“偏移量”字节处
f.seek(偏移量,选项)                选项=1表示指文件指针指向从文件的当前位置,向后移动“偏移量”的字节
                                      选项=2表示将文件指针指向文件的尾部,向前移动“偏移量”字节

偏移量有正负之分,正数向右,负数向左
f.fluse()#提交更新,不用f.close()就可以直接查看新写入的文件
文件查找 
cat a.t
hello word
hello hello word  统计文件中hello的个数
import re
fp = file("a.t","r")
count = 0
for s in fp.readlines():
    li = re.findall("hello",s)
    li len(li)>o:
             count = count+len(li)
print "Search"+count+“hello”
fp.close()

把a.t中的hello替换成csvt,并保存到文件a2.t
fp1 = file("a.t","r")
fp2 = file("a2.t","w")
for s in f1.readlines():
      fp2.write(s.replace("hello","cvst"))
fp1.close()
fp2.close()
把a.t中的hello替换成csvt
fp1 = file("a.t","r+")
s = f1.read():
f1.seek(0,0)
f1.write(s.replace("hello","csvt"))  hello比csvt多一个字母,替换时正好覆盖掉,若hel替换csvt则不能完全覆盖
fp1.close()

 

os模块
import os
os.mkdir('test')#创建了一个目录
os.makedirs('a/b/c')#如何查看tree a  a下创建 b,b下创建c
os.rmdir('test')#删除一个目录
os.removedirs('a')#删除多级目录
os.listdir(’.‘)#当前目录的显示       os.listdir(’jpg‘)#查看jpg的子目录      os.listdir(’/')#查看根目录
os.getcwd()#查看当前路径
os.chdir()#切换目录

 

 

目录的遍历
os.isdir()判断文件是不是个目录
os.path.isdir(fileepath)
os.walk函数返回一个元组,该元组有三个元素,分别表示遍历胡路径,目录列表和文件列表
os.path.join() 获取每个子文件的绝对路径


异常处理
IOError没有文件或目录    NameError名字错误
#coding:utf_8
filename = raw_input('请输入一个文件:')
try:
    open(filename)
    print hello
except IOError,msg:
    print "你指定的文件不存在"
except NameError,msg:
    print "内部变量调用错错误"
finally:              #上边代码是否正确,都执行finally     
    print "ok"

 

if filename== "hello":
    raise TypeError("nothing....")    #输入hello会报指定错误   


D:\Python安装程序\python.exe D:/学习资料/bijia.py
请输入一个文件:hello
你指定的文件不存在
Traceback (most recent call last):
ok
  File "D:/??????/bijia.py", line 16, in <module>
    raise TypeError("nothing....")  #自己定义的指定错误
TypeError: nothing.... #系统报的正确错误

Process finished with exit code 1

 

异常 描述
NameError 尝试访问一个没有申明的变量
ZeroDivisionError 除数为0
SyntaxError 语法错误
IndexError 索引超出序列范围
KeyError 请求一个不存在的字典关键字
IOError 输入输出错误(比如你要读的文件不存在)
AttributeError 尝试访问未知的对象属性
ValueError 传给函数的参数类型不正确,比如给int()函数传入字符串形

 


 

 

 

 

 

 

 

                                                                                                                                                                       

 

 

 

 


 

 

 

 

 

 


 

 

 


0

 

 

 

0
)

函数   调用  abs   即求绝对值  只有一个函数
      比较函数  cmp(x,y) 有两个参数 x>y  1    x<y   -1    x=y  0 
      数据类型转化   int()
     定义函数 自定义求绝对值的my-abs函数   def my-abs(a):
                                            if a>=0:  
                                              return  a


                                            else:
                                              return  -a

     def  power(x):         def power(x,n):     
      return  x*x                 s=1
                                  while  n>0:
                                       n=n-1
                                      s=s*x
                                 return s 
pycharm必须用英文半角  注意空格使用  
if 循环语句:  if sequence:                                                            if 1>2(True包括:表示非零数的所有值)   False:表示0,None,空的量等
                   print                                                                    print "ok"         
           只能控制下边带缩进的代码,一定要缩进,一般用空格缩进四个,冒号不能丢         print "main ok"    \不受if控制\
           多个if同时使用应用:进行分隔,可以使用elif
           x = int(raw_input("please input x:"))
#coding:utf-8

x = int(raw_input("please input:"))
if x>=90:
    print "A"                           \三个if都执行且else只与第三个if有关系\                          

if x>=80:
    print "B"
if x>=70:
    print "C"
else:
    print "bad"

 

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
please input:90
A
B
C

Process finished with exit code 0

 

#coding:utf-8

x = int(raw_input("please input:"))                \if-elif分开来判断,互不相关且else是三个判断都不成立才执行的\
if x>=90:
    print "A"
elif x>=80:
    print "B"
elif x>=70:
    print "C"
else:
    print "bad"

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
please input:99
A

Process finished with exit code 0

嵌套模式
#coding:utf-8

x = int(raw_input("please input x:"))              \x和y同事成立才会输出\
y = int(raw_input("please input y:"))
if x>=90:
    if y>90:
        print "A"
elif x>=80:
    print "B"
elif x>=70:
    print "C"
else:
    print "bad"

 

#coding:utf-8

x = int(raw_input("please input x:"))
y = int(raw_input("please input y:"))
if x>=90:                                                    可以使用and,or,not
    if y>90:              \相当于if x>=90 and y>90:\
        print "A"    \ify<90成立则输出\
    print "y>=90"     \if y>90不成立则输出\
elif x>=80:
    print "B"
elif x>=70:
    print "C"
else:
    print "bad"

 

 


for循环(有次数的循环)
   循环语句:  for inerating_var in sequence:
                          print
             s=()元组print s[x]     s = "hello"               l=[]列表print l
                                      for x in range(len(s)):
                                          print  s[x]     
            迭代:range(i,j ,[进步值])类似于切片 显示具体的数字             xrange(10)只显示xrange(10)不显示具体数字
              range(10)输出是十次且是从零到九   range(1,11)输出是十次且是从一到十   range(1,11,2)输出1,3,5,7,9


#coding:utf-8
s = "hello"
l = [1,2,3,'a','b']
t = (7,8,89,'x','f')
d = {1:111,2:222,5:555,3:3333}
for  x in l:      #在列表中取值的格式
    if x>=2:
        print x
for x in range(len(s)):       #字符串取值的方法
    print  s[x]
for x in d:
    print x      #只取出的是k
    print d[x]     #只取出v的值                   \一个for下边有多个print,三个print会同时执行多次\
    print d.items()   #取出(k,v)的键值对
for k,v in d.items():
    print k
    print v

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
2
3
a
b
h
e
l
l
o
1
111
[(1, 111), (2, 222), (3, 3333), (5, 555)]
2
222
[(1, 111), (2, 222), (3, 3333), (5, 555)]
3
3333
[(1, 111), (2, 222), (3, 3333), (5, 555)]
5
555
[(1, 111), (2, 222), (3, 3333), (5, 555)]
1
111
2
222
3
3333
5
555

Process finished with exit code 0

字典胡拆分  for k,v in d:                      for k,v in d.items():
                 print k                               print k
                 print v 类型错误                      print v类型正确

循环控制

#coding:utf-8
import time    #时间模块,两个元素之间出现的时间间隔
for  x in range(3):         #range(3)表示从0到2
    print x
    time.sleep(1)  #括号中数据表示间隔的时间
else:
    print "ending"   #只有正常结束才会ending      出现KeyboardInterrupt(终止信号)表示强制终止了,用Ctrl+c 此时不出现ending

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
0
1
2
ending

 


   
# encoding:utf-8                             
for x in range(35):                            
    print x
    if x==3:
        pass          pass #代码庄起到站位作用
    if x==2:
        print "hello22"
        continue           #跳过当次循环的余下语句
    if x==5:
        exit()             #exit()跳出整个程序
    if x ==6:
        break                #break跳出循环,不影响下边循环执行,也不执行else
    print"#"*50
else:
    print "ending"
for x in range(1,11):
    print "------>",x            # print "---->",x  逗号不可丢输出为---->x


加了exit()之后的运行结果
0
##################################################
1
##################################################
2
hello22
3
##################################################
4
##################################################
5

 

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
0
##################################################
1
##################################################
2
hello22
3
##################################################
4
##################################################
5
##################################################
6
------> 1
------> 2
------> 3
------> 4
------> 5
------> 6
------> 7
------> 8
------> 9
------> 10


while循环
while  True:
    print "hello"      #死循环


# encoding:utf-8
x=""  #设置一个全局变量,空字符
while x != "q":       #x!=q则为真
    print "hello"
    x = raw_input("please input something,q for quit:")
    if not x :    #逻辑否,即回车键
        break
    if x=="c":
        continue              #输入c则不会出现one more time...
    print "one more time...."
else:
    print "ending...."           #ending....出现是正常程序正常结束即输入q,不是按回车键或Ctrl+c


输入q的结果
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/if.py
hello
please input something,q for quit:q
one more time....
ending....

 

 


定义函数名是两个以上单词的,第二个单词首字母要大写
def add():    再次调用时只需输入add()
单独的一个def操作不会出现效果,跟定义一个变量没有差别
# encoding:utf-8
a = 100                                          def = fun(x):
def fun():                                           print "i get a:",x    #x为变量
    if True:
        print "good"
        print a

if fun():
    print "ok"
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
good
100

Process finished with exit code 0
# encoding:utf-8
a = 100
def fun():
    if True:
        print "good"
        print a

fun()
fun()
fun()
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
good
100
good
100
good
100

Process finished with exit code 0
# encoding:utf-8
a = 100
def fun():
    if True:
        print "good"
print a

fun()
fun()
fun()
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
100
good
good
good

 

# coding:utf-8
a = 100
def fun():
    print "a"
fun()
if fun():
    print "ok"           #输出没有ok


D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled1/函数定义.py
a
a

Process finished with exit code 0


Process finished with exit code 0

形参和实参
#coding:utf-8                                     
def mashine(x,y='奶油'):                            
    print "制作一个",x,'元',y,'口味的冰淇淋!'                
                                                  
mashine( 3,'巧克力')                                 
                   
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/编程.py
制作一个 3 元 巧克力 口味的冰淇淋!

Process finished with exit code 0
x和y要一一对应,若只输入y的值,mashine(y=‘奶油’)若只输入x的值,mashine(3)
输出一句话要用 "制作一个",x,'元',y,'口味的冰淇淋!'注意符号的使用

 

局部变量和全部变量       #若局部变量与全部变量重名含义仍不变                     
#coding:utf-8
a= 'i am global var'/*全部变量*/
def fun():
    a=100/*局部变量,不可以被当作全部变量使用*/
    global x/*局部变量,只要函数fun()被调用,x就可以当全部变量来使用*/
    x=200
    print a
fun()
print x

 


函数返回值    sum([1,2,3,4,5])求和
#coding:utf-8
def f(x,y):
    print  x+y/*x+y即计算x+y若为"x+y"则直接输出x+y*/
f(97,98)
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
195

Process finished with exit code 0
#coding:utf-8
def f(x,y):
    print  "welcome"
    print x+y
f(2,3)
z=f(3,4)
print z
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
welcome
5
welcome
7
None

Process finished with exit code 0
#coding:utf-8
def f():
    return "hello"
z= f()/*函数被调用后必须返回一个指定的值,否则无法输出*/
print z
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
hello

Process finished with exit code 0
#coding:utf-8
def f():
    return "hello"/*同时输入两个return,只有第一个执行即return执行后函数终止*/
    return "rr"
z= f()
print z
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
hello
Process finished with exit code 0
向函数传元组和字典
#coding:utf-8
t=('name','milo')
def f(x,y):
    print " % s: % s" % ( x,y)/*注意这种写法*/
f(*t)/*输出t元组对应的两个值*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
 name: milo

Process finished with exit code 0
#coding:utf-8
def f(name='name',age=0):
    print "name:%s"% name
    print "age: %d"% age
f("milo",30)
d={"name":'milo',"age":20}/*字典的无序性*/
f(**d)/*定义字典调用的方法*/
d={'a':9,'n':'milo'}
f(d['n'],d['a'])/*两个值要一一对应*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
name:milo
age: 30
name:milo
age: 20
name:milo
age: 9
处理多余的实数
#coding:utf-8
def f(x,*args):
    print x
    print args
f(1,2,3)
def f(x,*args,**kwargs):/*一个*表示元组,两个**表示字典*/
    print x
    print args
    print kwargs
f(x=1,y=3,z=4)/*y=3,z=4根元组形式一样*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
1
(2, 3)
1
()
{'y': 3, 'z': 4}

Process finished with exit code 0
Process finished with exit code 0
lambda函数胡使用
#coding:utf-8
g = lambda x,y:x*y/*必须亦g=*/
print g(2,3)/*print必须有*/
swtich函数使用

def jia(x,y):
    return x+y
def jian(x,y):
    return x-y
def cheng(x,y):
    return x*y
def chu(x,y):
    return x/y
operator = {"+":jia,"-":jian,"*":cheng,"/":chu}
def f(x,o,y):
    print operator.get(o)(x,y)/*字典的用法*/
f(3,"+",3)
常用函数
pow(x,y)x的y次幂 pow(x,y,z)x的y次幂再除以z求模   divmod(x,y)交换x和y的位置   round()返回一个浮点数
callable()测试一个函数可不可以被调用   isinstance(l,list)判断对象的类型   cmp()比较两个字符串
range()快速生成一个序列   xrange()生成器,不出现序列,效率比range高很多 
类型转化函数
type()类型  int()整数型  long()长整形  float()浮点型  complex()复数型
str(x )                      将对象 x 转换为字符串
tuple(s )                    将序列 s 转换为一个元组
list(s )                     将序列 s 转换为一个列表
chr(x )                      将一个整数转换为一个字符
ord(x )                      将一个字符转换为它的整数值
hex(x )                      将一个整数转换为一个十六进制字符串
oct(x )                      将一个整数转换为一个八进制字符串
       

 


内置函数
string函数的应用/*必须是针对字符串使用的*/
用help查询的时候用法help(str.replace)
#coding:utf-8
s='13234443213223'
print s.replace('2','d')/*s必须标明.不可少*/
print s.replace('2','d',1))/*最后的1表示只把第一个2替换成d*/

 

str.capitalize()把字符串首字母转化为大写
str.split('x',y)做切割用的,x表示切割的位置,y表示切割的次数
string.replace(s,'hello','good')
序列处理函数
len()求长度
max()求最大值     filter(f,L)主要用于序列的操作    f表示取大于5的数,L表示一个列表,表示取出在L中符合f的值
#coding:utf-8

name=['milo','zou','tom']
age=[20,30,40]
tel=['133','156','234']
t=['x','y']
print zip(name,age,tel,t)          /*zip和map的区别*/
print map(None,name,age,tel,t)

a=[1,3,5]
b=[2,3,4]
def mf(x,y):
    return x*y
print map(None,a,b)/*输出a,b对应为位置的成对列表*/
print map(mf,a,b)/*把a,b列表的值在mf语句中实现*/

L=range(1,101)
def rf(x,y):         /*可以转化为reduce(lambda x,y:x+y,L)*/      若想取lL列表中的偶数值filter(lambda x:if x%2 ==0.1,L)  无法运行,其中包含两个函数
    return x +y                                                                      应该为filter(lambda x:x%2 ==0.1,L)
print reduce(rf,L)/*把L中的数据在rf中运算*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
[('milo', 20, '133', 'x'), ('zou', 30, '156', 'y')]
[('milo', 20, '133', 'x'), ('zou', 30, '156', 'y'), ('tom', 40, '234', None)]
[(1, 2), (3, 3), (5, 4)]
[2, 9, 20]
5050

Process finished with exit code 0

 

 

正则表达式(保持字符串原义)
re模块正则表达式模块接口


[]/*指定一个字符集,用来匹配其中的任意一个字符*/
#coding:utf-8
import re
s = r'abc'/*定义时r不能丢,只是正则的一种形式,表示abc字符串*/                      元字符在字符集中不起作用[amk$]
print re.findall(s,"aaaaaaaaaaaaaaaaaaaaaa")
print re.findall(s,"aaaaaaabcaaaaaaaaaabcaa")
st="top tip tap twp tep"
res = r"top"
print re.findall(res,st)
res = r"t[io]p"/*表示取tip或者top都被取到[]表示或者的意思*/
print re.findall(res,st)
res=r"t[^io]p"/*表示除了tip,top都能被取到,^表示除了*/
print re.findall(res,st)

D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
[]
['abc', 'abc']
['top']
['top', 'tip']
['tap', 'twp', 'tep']

Process finished with exit code 0

 


#coding:utf-8
import re
s = "hello word, hello word"
r = r"hello"
print  re.findall(r,s)/*用r这个正则来匹配s这个字符串*/
r = r"^hello"/*只取开头单词hello*/
print  re.findall(r,s)
s = "word,hello word"
print re.findall(r,s)/*若开头不是hello,则输出一个空列表*/
r = r"word$"/*取最末尾的单词Word*/
print re.findall(r,s)
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['hello', 'hello']
['hello']
[]
['word']

Process finished with exit code 0


r = r"x[0-9]x"/*表示0到9这十个数字*/
print re.findall(r.‘x1x x2x x9x')
r = "t[abc$]"/*在方括号中$和^只表示符号,无任何实际意义*/


#coding:utf-8
import re
r = r"^abc"
print re.findall(r,"abc")
print re.findall(r,"^abc")
r = r"\^abc"/*\把特殊字符当成普通字符处理*/
print re.findall(r,"^abc")
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['abc']
[]
['^abc']

Process finished with exit code 0
\d匹配任何十进制数,它相当于[0-9]
\D匹配任何非数字字符,它相当于类[^0-9]
\s匹配任何空白字符,它相当于类[\t\n\r\f\v]
\S匹配任何非空字符,它相当于类[^\t\n\r\f\v]
\w匹配任何字母数字字符,它相当于类[a-zA-Z0-9]
\W匹配任何非字母数字字符,它相当于类[^a-zA-Z0-9]
  


#coding:utf-8
import re
r = r"^010-?\d{8}$"/*-?表示-可有可无,{x}表示\d重复x次,^表示开头数$表示结尾数*/
print re.findall(r,'010-2343245')
print re.findall(r,'010-23456789')

r = r"ab*"/*b的重复数字可以是零次一次或更多*/
print re.findall(r,"a")
print re.findall(r,"abbbbbb")
r = r"ab+"/*贪婪模式,做最大匹配即输出所有的字符*//*+表示重复数字至少有一次*/
print re.findall(r,"a")
print re.findall(r,"ab")
print re.findall(r,"abbbbbb")
r = r"ab+?"/*非贪婪模式,做最小匹配,即只输出重复数字一次*/
print re.findall(r,"abbbbbbbbbbbb")
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
[]
['010-23456789']
['a']
['abbbbbb']
[]
['ab']
['abbbbbb']
['ab']

Process finished with exit code 0

#coding:utf-8
import re
r = r"a{1,3}"/*至少有1个重复,最多有3个重复*/
print re.findall(r,'a')                                                      {0,}等同于*  {1,}等同于+   {0,1}等同于?                                                  
print re.findall(r,'d')
print re.findall(r,'aaaaa')/*若大于3,则分成多个字符串出现*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['a']
[]
['aaa', 'aa']

Process finished with exit code 0

 

 

 

#coding:utf-8
import re
r1 =r"\d{3,4}-?\d{8}\\"       /*加r时后边\不会被特殊处理,不加r,打印\就会报错*/
print re.findall(r1,"010-12345677\\")
p_tel=re.compile(r1)      \*用re.compile编译正则表达式*\
print p_tel
print p_tel.findall('010-234455628\\')   \*缺点:输出字节只根据要求长度进行截取*\

csvt_re = re.compile(r'csvt',re.I)        \*后边的re.I使用后可以不用区分大小写*\
print csvt_re.findall('CSVT')
print csvt_re.findall('Csvt')
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled/ll.py
['010-12345677\\']
<_sre.SRE_Pattern object at 0x0258E248>
[]
['CSVT']
['Csvt']

Process finished with exit code 0


#coding:utf-8
import re
r1 = r"csvt.net"\*点的含义是点或者一个字母*\
print re.findall(r1,'csvt.net')
print re.findall(r1,'csvtonet')
print re.findall(r1,'csvt\nnet')
print re.findall(r1,'csvt\nnet',re.S)\*大写的S和点搭配使用的含义是包括换行在内的所有字符*\
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled1/.idea/正则符号的使用.py
['csvt.net']
['csvtonet']
[]
['csvt\nnet']

Process finished with exit code 0

Process finished with exit code 0

 

#coding:utf-8
import re
s="""
hello csvt
csvt hello
hello csvt hello
csvt hehe
"""
r = "^csvt"
print re.findall(r,s)
print re.findall(r,s,re.M)   /*若输入为多行的时候re.M可以与$和^一起使用,使^和$发挥之前的作用*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/正则表达式.py
[]
['csvt', 'csvt']

Process finished with exit code 0

 


#coding:utf-8
import re
tel = r"""
\d{3,4}
-?
\d{8}                           X与verbose意思一样
"""
print re.findall(tel,'010-12345678',re.X)/*正则为多行的时候用re.X可以输出成一行*/
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/正则表达式.py
['010-12345678']

Process finished with exit code 0

#coding:utf-8
import re
email = r"\w{3}@\w+(\.com|\.cn)"               \*可以有分组,返回值优先返回分组中的数据,分组中可以出现或这种关系*\
print re.findall(email,'zzz@csvt.com')
D:\Python安装程序\python.exe C:/Users/欢/PycharmProjects/untitled2/字符.py
['.com']

Process finished with exit code 0

 

 

 

 

 

正则爬虫网页
链接网址是要把想要的括起来,还有单双引号的互用 reg = r'src="(http://.+\.jpg)"'网址必须用""外边用''引号
图片的形式有两种jpg和gif


#coding:utf-8
import re
import urllib
def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?)\.jpg" pic_ext'
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1

html = getHtml("http://tieba.baidu.com/p/3815851566")
getImg(html)
   

 

 

浅拷贝和深拷贝(原数据改变拷贝数据不变)
import copy
a = [1,2,3,['a','b','c']]
b = a
c = copy.copy(a)            /*浅拷贝*/            浅拷贝整体id改变局部id不变    
print a                         若a再增加值,b会改变但c不变
print b                       若a对内层函数增加值,c也增加
print c
print id(a)
print id(b)
print id(c)
a.append('f')
print a
print b
print c
d = copy.deepcopy(a)              /*深拷贝*/           深拷贝整体局部id都改变
print d                                           若a对内层函数增加值,c不增加
print id(a)
print id(d)

 


[1, 2, 3, ['a', 'b', 'c']]
[1, 2, 3, ['a', 'b', 'c']]
[1, 2, 3, ['a', 'b', 'c']]
43979680
43979680
43988112
[1, 2, 3, ['a', 'b', 'c'], 'f']
[1, 2, 3, ['a', 'b', 'c'], 'f']
[1, 2, 3, ['a', 'b', 'c']]
[1, 2, 3, ['a', 'b', 'c'], 'f']
43979680
43925336

Process finished with exit code 0

 

爬虫

错误示例:
#coding:utf-8

import re
import urllib
import time
def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?)\.jpg" pic_ext='
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1


Q = "pn=y"
M = "http://tieba.baidu.com/p/3815851566"
for y in range(1,41):
    print  Q+W
html = getHtml(M+Q)
getImg(html)

 


错误示例:
#coding:utf-8

import re
import urllib

def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?)\.jpg" pic_ext='      \图片源代码的正确书写\
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1
M = "http://tieba.baidu.com/p/3815851566?pn="
for y in range(1,41):
    print  M,y
    A=M,y                 \应把y当成一个字符串str(y)\
    html = getHtml(A)
    getImg(html)


正解
#coding:utf-8

import re
import urllib

def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'src="(.*?\.jpg)" pic_ext='
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1
M = "http://tieba.baidu.com/p/3815851566?pn="
for y in range(1,41):
    print  M+str(y)
    A=M+str(y)
    html = getHtml(A)
    getImg(html)

 

 

 

 

 


正解:
#coding:utf-8

import re,os
import urllib
import time

def getHtml(url):                 # \获取网址\
    page = urllib.urlopen(url)
    html = page.read()
    return html


def getImg(html):          #\用正则查找符合图片条件的网址\


    reg = r'src="(http://imgsrc.*?)" pic_ext='   #  \多个网页的图片代码不同,要仔细查阅\ #括号里是你想的到的网址不包括引号
    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=0
    for imgurl in imglist:                         # \下载照片,并且附上文件名\
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1


for y in range(1,41):
    url = "http://tieba.baidu.com/p/3815851566?pn=" + str(y)             # \网页变化是网址的变化,并且要用字符串输出\ 
    print url
    html = getHtml(url)
    getHtml(html)

 


爬风景
# coding:utf-8

import re
import urllib
print 'a'

def  getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def getImg(html):
    reg = r'data-src="(http://img.mp.itc.cn/.*?\.jpg)" style='

    imger = re.compile(reg)
    imglist = re.findall(imger,html)
    x=116
    for imgurl in imglist:
        urllib.urlretrieve(imgurl,'%s.jpg'% x)
        x+=1
html = getHtml("http://www.tuniu.com/trips/10111346")
getImg(html)

爬文本


def getImg(html):
    reg = r'href"(http://.*?\.html)" target='
    imger = re.compile(reg)
    imglist = re.findall(imger,html)


  
  

 

 

 

 请问打开图像后会出现This image could not be loaded?(图片不能加载)? 5

 


 
有几种情况;
1、可能软件版本安装时丢失数据了!安装不成功!
2、可能图片有问题!
3、图片可能不是通用格式
4、你的电脑系统版本对新存的图片文件的数据是否有丢失?!!
第一种情况可能性最大!第四种次之。。。。。


将会输出gif,png,jpeg等图片类型

 

 

# coding:utf-8
import re
import urllib

url = "http://www.muyingzhijia.com/Shopping/subcategory.aspx?cateID=31"


def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html


def getDSJ(html):

    reg = r'a href="(.*?)" class="traceCount landingStats" target="_blank" title="(.*?)"'    target位置不能变且有一个空格的距离
    lsre = re.compile(reg)
    ls = re.finditer(lsre, html)

    fd = open('test.txt', 'a')
    for imgurl in ls:

        fd.write(imgurl.group(2) + '\t' + imgurl.group(1) + '\n')
    fd.close()

html = getHtml(url)

ls = getDSJ(html)
print 'ok'


对文件的读写操作
fo = open('/root/test.txt') #打开文件
fo.read() #读取文件
fo.close() #关闭文件  
f1 = file('/root/test.txt') #表示类
f1.close()  #一定要记得关闭文件
cat .txt   读取txt的文件
fnew = open('/root/new.txt','w')
fnew.write('hello  \n i am milo')
fnew.close() #必须关闭之后才会在文件中显示
fnew = open('/root/new.txt','r+')
fnew.read()
fnew.write("new con")
fenw.close()  #先读再写,新加的文字会在文件后边
fnew = open('/root/new.txt','r+')
fnew.write("nooooo")
fnew.close()  #直接写会出现在文件开头
r 只读    r+读写        w写入,先删除原文件,再重新写入,如果原文件没有则创建
w+ 读写,先删除原文件,再重新写入,如果原文件没有则创建(可以写入输出)
a  写入:在文件末尾追加新内容,文件不存在,创建之
a+  读写:在文件末尾追加新内容,文件不存在,创建之
b  打开二进制的文件 可以与r,w,a,+结合使用
U支持所有的换行符号  "\r""\n""\r\n"

 

 

f.readline()#一行一行的读取文件,超出后显示空字符串
f.readlines()#返回一个列表
f.next()#一行一行的读取,超出后停止迭代
f.writelines()#写入多行数据
f.seek(0,0)#指针在文件开头         选项=0表示文件指针指向从文件头部到“偏移量”字节处
f.seek(偏移量,选项)                选项=1表示指文件指针指向从文件的当前位置,向后移动“偏移量”的字节
                                      选项=2表示将文件指针指向文件的尾部,向前移动“偏移量”字节

偏移量有正负之分,正数向右,负数向左
f.fluse()#提交更新,不用f.close()就可以直接查看新写入的文件
文件查找 
cat a.t
hello word
hello hello word  统计文件中hello的个数
import re
fp = file("a.t","r")
count = 0
for s in fp.readlines():
    li = re.findall("hello",s)
    li len(li)>o:
             count = count+len(li)
print "Search"+count+“hello”
fp.close()

把a.t中的hello替换成csvt,并保存到文件a2.t
fp1 = file("a.t","r")
fp2 = file("a2.t","w")
for s in f1.readlines():
      fp2.write(s.replace("hello","cvst"))
fp1.close()
fp2.close()
把a.t中的hello替换成csvt
fp1 = file("a.t","r+")
s = f1.read():
f1.seek(0,0)
f1.write(s.replace("hello","csvt"))  hello比csvt多一个字母,替换时正好覆盖掉,若hel替换csvt则不能完全覆盖
fp1.close()

 

os模块
import os
os.mkdir('test')#创建了一个目录
os.makedirs('a/b/c')#如何查看tree a  a下创建 b,b下创建c
os.rmdir('test')#删除一个目录
os.removedirs('a')#删除多级目录
os.listdir(’.‘)#当前目录的显示       os.listdir(’jpg‘)#查看jpg的子目录      os.listdir(’/')#查看根目录
os.getcwd()#查看当前路径
os.chdir()#切换目录

 

 

目录的遍历
os.isdir()判断文件是不是个目录
os.path.isdir(fileepath)
os.walk函数返回一个元组,该元组有三个元素,分别表示遍历胡路径,目录列表和文件列表
os.path.join() 获取每个子文件的绝对路径


异常处理
IOError没有文件或目录    NameError名字错误
#coding:utf_8
filename = raw_input('请输入一个文件:')
try:
    open(filename)
    print hello
except IOError,msg:
    print "你指定的文件不存在"
except NameError,msg:
    print "内部变量调用错错误"
finally:              #上边代码是否正确,都执行finally     
    print "ok"

 

if filename== "hello":
    raise TypeError("nothing....")    #输入hello会报指定错误   


D:\Python安装程序\python.exe D:/学习资料/bijia.py
请输入一个文件:hello
你指定的文件不存在
Traceback (most recent call last):
ok
  File "D:/??????/bijia.py", line 16, in <module>
    raise TypeError("nothing....")  #自己定义的指定错误
TypeError: nothing.... #系统报的正确错误

Process finished with exit code 1

 

异常 描述
NameError 尝试访问一个没有申明的变量
ZeroDivisionError 除数为0
SyntaxError 语法错误
IndexError 索引超出序列范围
KeyError 请求一个不存在的字典关键字
IOError 输入输出错误(比如你要读的文件不存在)
AttributeError 尝试访问未知的对象属性
ValueError 传给函数的参数类型不正确,比如给int()函数传入字符串形

 


 

 

 

 

 

 

 

                                                                                                                                                                       

 

 

 

 


 

 

 

 

 

 


 

 

 


0

 

 

 

0
)