小学生学python(七)练习
我天天练习,天天都在熟悉
1. 生成一个文件,文件名为pack.dat,大小为1G
import time def createbigfile(n): local_time = time.strftime('%Y%m%d_%H%M%S', time.localtime()) #file_path = 'bigfile'+ str(local_time) + '.log' file_path = 'pack.dat' f = open(file_path, 'w') f.seek(1024*1024*1024*int(n)) f.write('testfile') f.close() if __name__ == '__main__': n = input('请输入需要生成几个G大小的文件:') createbigfile(n)
2.读取一个xml文件
假设一个country_data.xml
<?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank>1</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> </country> <country name="Singapore"> <rank>4</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N"/> </country> <country name="Panama"> <rank>68</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> </country> </data>
对其进行操作
from xml.etree import ElementTree as ET tree = ET.parse('country_data.xml') root = tree.getroot() print(root.tag) for child in root: print(child.tag, child.attrib) # 取指定数据 print(root[0][0].text) #rank的值 print(root[0][1].text) #year的值 # 取感兴趣的数据 for neighbor in root.iter('neighbor'): print(neighbor.attrib) for country in root.findall('country'): rank = country.find('rank').text name = country.get('name') print(name, rank) # 修改文件 for rank in root.iter('rank'): new_rank = int(rank.text) + 1 rank.text = str(new_rank) rank.set('updated', 'y') tree.write('output.xml') for country in root.findall('country'): rank = int(country.find('rank').text) if rank > 60: root.remove(country) tree.write('output.xml')
但是,这个方式在遇到编码方式是GB的时候,就会报错误
ValueError: multi-byte encodings are not supported
比如
<?xml version="1.0" encoding="GB2312"?>
<date>
</date>
可以使用库lxml,比如
from lxml import etree xml = etree.parse("http_cpack.xml") print(etree.tostring(xml)) print(xml.xpath('//*')) print(xml.xpath('//conds')) print(xml.xpath('//conds//cond[@value="32220401"]'))