python 代码脚本集

if / else 三目运算

age = 19
if age > 18:
    ji = "adult"
else:
    ji = "child"
ji = 'adult' if age > 18 else 'child'   //这样写直接直观
print ji

真值判断

if attr == True:
    do_something()

if len(values) != 0: # 判断列表是否为空
    do_something()

可以直接这样写

if attr:
    do_something()

if values:
    do_something()

for / else break语句不执行时候就会执行 else语句

age = [3,6,8,2,7,8,4,67,3]
res = False
for i in age:
    if i > 60:
        res = True
        break

if not res:
    print "noooo"
else:
    print "yessssss"

//使用 for / else 进行判断

for i in age:
    if i > 60:
        print "yesssssss"
        break
else:
    print "nooooooooo"

获取字典元素

dic = {"name":"jk","age":23}

if dic.has_key("name"):
    print dic['name']
else:
    print "no name attr"

print dic.get("name","no name attr")

文件内容的对比

符号含义的说明
'-' 包含在第一个序列行中,但不包含在第二个序列行
'+' 包含在第二个序列行中,但不包含在第一个序列行
'' 两个序列行一致
'?'标志两个序列行存在增量差异
'^' 标志出两个序列行存在的差异字符

import difflib


test1="""
helloworld232323
33333333
6666
444444444
"""

test2="""
hellowerqerqererq
33333333
helli
44444444445
"""

test1_t=test1.splitlines()
test2_t=test2.splitlines()

#打印出内容的不同
diff=difflib.Differ()
diff_cont=diff.compare(test1_t,test2_t)
print "\n".join(list(diff_cont))
#生成HTML 文档格式
diff=difflib.HtmlDiff()
print diff.make_file(test1_t,test2_t)

遍历路径下的文件/目录

1  使用os.lisdir递归
dirlist=[]
filelist=[]
def listall(dir):
    for files in os.listdir(dir):
        dir_file_path = os.path.join(dir,files)
        if os.path.isdir(dir_file_path):
            dirlist.append(dir_file_path)
            listall(dir_file_path)
        else:
            filelist.append(dir_file_path)

listall('./')

print dirlist
print filelist

2 使用os.walk()
dirlist=[]
filelist=[]
for root,dirs,files in os.walk('./'):
     for dir in dirs:
        dirlist.append(os.path.join(root,dir))
     for  file in files:
        
        filelist.append(os.path.join(root,file))

print dirlist
print filelist

requests / urllib2 两种方法http请求

import requests
import urllib2

url="http://www.nipic.com/"
print requests.get(url).content
print urllib2.urlopen(url).read()

使用lxml 模块解析html页面中的所有img元素下载到本地

import urllib.request
import requests
import os
from lxml import html

def main():
# opens xkcd.com
    try:
        page = requests.get("http://www.nipic.com/")
    except requests.exceptions.RequestException as e:
        print(e)
        exit()
    # parses xkcd.com page
    tree = html.fromstring(page.content)
    # finds image src url
    image_src = tree.xpath("//img/@src")
    #//B[@id] 
    #所有具有属性id的B元素 

    comic_location_dir = os.getcwd() + '/comics/'

    # checks if save location exists else creates
    # if not os.path.exists(comic_location_dir):
    #     os.makedirs(comic_location)
    num=0
    for x in image_src:
        comic_location = comic_location_dir + str(num) + ".jpg"
        print x
        print comic_location
        urllib.request.urlretrieve(x, comic_location)
        num = num + 1


if __name__ == "__main__":
    main()

urllib.request.urlretrieve方法只适用于python3

还有个requests-html 只适用于python 3。用例访问解析html更加强大

https://requests-html.kennethreitz.org/

posted on 2017-09-11 16:38 思此狂阅读(454) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

阅读排行：
· 震惊！C++程序真的从main开始吗？99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码？零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾（3.3-3.9）
· Vue3状态管理终极指南：Pinia保姆级教程

思此狂

python 代码脚本集

导航

公告

搜索

常用链接

随笔分类

随笔档案

阅读排行榜

推荐排行榜