python 解析XML文件
比较高效的python 解析XML文件
参考 http://codingpy.com/article/parsing-xml-using-python/
try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET import time def parse_poi_by_elementTree(filepath): t0 = time.time() tree = ET.ElementTree(file=filepath) pois_element_num = 0 vde_poi = 0 # for elem in tree.iter(tag='Pois'): # pois_element_num = elem.get('Num') # print pois_element_num # for e in elem.iter(): # if e.tag == 'Poi': # vde_poi += 1 pois_element_num = tree.iter(tag='Pois').next().get('Num') vde_poi = len(list(tree.iter(tag='Poi'))) cost_time = time.time() - t0 print 'parse_poi_by_elementTree time cost is %s' % cost_time return pois_element_num, vde_poi def parse_poi_by_iterparse(filepath): t0 = time.time() pois_element_num = 0 vde_poi = 0 for event, elem in ET.iterparse(filepath): # if event == 'end': if elem.tag == 'Poi': vde_poi += 1 if elem.tag == "Pois": pois_element_num = int(elem.get('Num')) elem.clear() cost_time = time.time() - t0 print 'parse_poi_by_iterparse time cost is %s' % cost_time return pois_element_num, vde_poi from statistic import StatisticItem, XML_STREET, XML_POI import os def parse_street_xml_by_ET(street_file): if not os.path.exists(street_file): return StatisticItem(XML_STREET, [0, 0]) street_num = 0 vde_streets = 0 # actual count for event, elem in ET.iterparse(street_file): if elem.tag == 'Street': vde_streets += 1 if elem.tag == "Streets": street_num = int(elem.get('Num')) elem.clear() return StatisticItem(XML_STREET, [vde_streets, street_num]) def parse_poi_xml_by_ET(poi_file): if not os.path.exists(poi_file): return StatisticItem(XML_POI, [0, 0]) poi_num = 0 vde_pois = 0 # actual count for event, elem in ET.iterparse(poi_file): if elem.tag == 'Poi': vde_pois += 1 if elem.tag == "Pois": poi_num = int(elem.get('Num')) elem.clear() return StatisticItem(XML_POI, [vde_pois, poi_num]) if __name__ == '__main__': # C:\Users\shchshan\Desktop\vde\State_14120002\POI_1414000018.xml # C:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml print parse_poi_by_elementTree(r'C:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml') print parse_poi_by_iterparse(r'C:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml')