python 代码脚本集

if / else 三目运算

age = 19
if age > 18:
    ji = "adult"
else:
    ji = "child"
ji = 'adult' if age > 18 else 'child'   //这样写直接直观
print ji

 

真值判断

if attr == True:
    do_something()

if len(values) != 0: # 判断列表是否为空
    do_something()

可以直接这样写

if attr:
    do_something()

if values:
    do_something()

 

for  / else    break语句不执行时候就会执行 else语句

age = [3,6,8,2,7,8,4,67,3]
res = False
for i in age:
    if i > 60:
        res = True
        break

if not res:
    print "noooo"
else:
    print "yessssss"

//使用 for / else 进行判断
for i in age: if i > 60: print "yesssssss" break else: print "nooooooooo"

获取字典元素

dic = {"name":"jk","age":23}

if dic.has_key("name"):
    print dic['name']
else:
    print "no name attr"

print dic.get("name","no name attr")

 文件内容的对比

符号含义的说明
'-' 包含在第一个序列行中,但不包含在第二个序列行
'+' 包含在第二个序列行中,但不包含在第一个序列行
'' 两个序列行一致
'?'标志两个序列行存在增量差异
'^' 标志出两个 序列行存在的差异字符

import difflib


test1="""
helloworld232323
33333333
6666
444444444
"""

test2="""
hellowerqerqererq
33333333
helli
44444444445
"""

test1_t=test1.splitlines()
test2_t=test2.splitlines()

#打印出内容的不同
diff=difflib.Differ()
diff_cont=diff.compare(test1_t,test2_t)
print "\n".join(list(diff_cont))
#生成HTML 文档格式
diff=difflib.HtmlDiff()
print diff.make_file(test1_t,test2_t)

遍历路径下的文件/目录

1  使用os.lisdir递归
dirlist=[]
filelist=[]
def listall(dir):
    for files in os.listdir(dir):
        dir_file_path = os.path.join(dir,files)
        if os.path.isdir(dir_file_path):
            dirlist.append(dir_file_path)
            listall(dir_file_path)
        else:
            filelist.append(dir_file_path)

listall('./')

print dirlist
print filelist

2 使用os.walk()
dirlist=[]
filelist=[]
for root,dirs,files in os.walk('./'):
     for dir in dirs:
        dirlist.append(os.path.join(root,dir))
     for  file in files:
        
        filelist.append(os.path.join(root,file))

print dirlist
print filelist

requests / urllib2 两种方法http请求

import requests
import urllib2

url="http://www.nipic.com/"
print requests.get(url).content
print urllib2.urlopen(url).read()

使用lxml 模块解析html页面中的所有img元素下载到本地

import urllib.request
import requests
import os
from lxml import html

def main():
# opens xkcd.com
    try:
        page = requests.get("http://www.nipic.com/")
    except requests.exceptions.RequestException as e:
        print(e)
        exit()
    # parses xkcd.com page
    tree = html.fromstring(page.content)
    # finds image src url
    image_src = tree.xpath("//img/@src")
    #//B[@id] 
    #所有具有属性id的B元素 

    comic_location_dir = os.getcwd() + '/comics/'

    # checks if save location exists else creates
    # if not os.path.exists(comic_location_dir):
    #     os.makedirs(comic_location)
    num=0
    for x in image_src:
        comic_location = comic_location_dir + str(num) + ".jpg"
        print x
        print comic_location
        urllib.request.urlretrieve(x, comic_location)
        num = num + 1


if __name__ == "__main__":
    main()
urllib.request.urlretrieve方法只适用于python3

 还有个requests-html 只适用于python 3。用例访问解析html更加强大

https://requests-html.kennethreitz.org/

posted on 2017-09-11 16:38  思此狂  阅读(453)  评论(0编辑  收藏  举报

导航