python开发_xml.dom_解析XML文档_完整版_博主推荐
在阅读之前,你需要了解一些xml.dom的一些理论知识,在这里你可以对xml.dom有一定的了解,如果你阅读完之后。
下面是我做的demo
运行效果:
解析的XML文件位置:c:\\test\\hongten.xml
1 <?xml version="1.0" encoding="UTF-8"?> 2 <students> 3 <student no="2009081097"> 4 <name>Hongten</name> 5 <gender>M</gender> 6 <age>20</age> 7 <score subject="math">97</score> 8 <score subject="chinese">90</score> 9 </student> 10 <student no="2009081098"> 11 <name>DuDu</name> 12 <gender>W</gender> 13 <age>21</age> 14 <score subject="math">87</score> 15 <score subject="chinese">96</score> 16 </student> 17 <student no="2009081099"> 18 <name>Sum</name> 19 <gender>M</gender> 20 <age>19</age> 21 <score subject="math">64</score> 22 <score subject="chinese">98</score> 23 </student> 24 </students>
====================================================
代码部分:
====================================================
1 #python xml.dom 2 3 #Author : Hongten 4 #Mailto : hongtenzone@foxmail.com 5 #Blog : http://www.cnblogs.com/hongten 6 #QQ : 648719819 7 #Version : 1.0 8 #Create : 2013-09-03 9 10 import os 11 from xml.dom import minidom 12 13 #global var 14 SHOW_LOG = True 15 XML_PATH = None 16 17 def get_dom_by_parse(path): 18 '''根据XML文件地址解析XML文件,返回dom对象''' 19 if os.path.exists(path): 20 if SHOW_LOG: 21 print('开始解析XML文件:[{}]'.format(path)) 22 return minidom.parse(path) 23 else: 24 print('the path [{}] dose not exist!'.format(path)) 25 26 def get_dom_by_file(path): 27 '''解析作为文档打开的XML文件''' 28 if os.path.exists(path): 29 if SHOW_LOG: 30 print('开始打开XML文件:[{}]'.format(path)) 31 with open(path) as pf: 32 if SHOW_LOG: 33 print('开始解析XML文件:[{}]'.format(path)) 34 return minidom.parse(pf) 35 else: 36 print('the path [{}] dose not exist!'.format(path)) 37 38 def get_dom_by_string(s): 39 '''解析以字符串形式的XML数据格式''' 40 if s is not None and s != '': 41 if SHOW_LOG: 42 print('开始解析字符串形式的XML数据:[{}]'.format(s)) 43 return minidom.parseString(s) 44 else: 45 print('the input string is None or equals \'\'.') 46 47 def get_root(dom): 48 '''返回XML文件的根节点''' 49 if dom is not None: 50 return dom.documentElement 51 else: 52 print('the dom is None!') 53 54 def get_element_children(fatherElement, subNodeName): 55 '''根据父节点fatherElement获取子节点subNodeName''' 56 if fatherElement is not None: 57 if subNodeName is not None and subNodeName != '': 58 return fatherElement.getElementsByTagName(subNodeName) 59 else: 60 print('the sub node name is None or equals \'\'.') 61 else: 62 print('the father node is None!') 63 64 def get_element_value(element, index=0): 65 '''获取节点的值''' 66 if element is not None: 67 return element.childNodes[index].nodeValue 68 else: 69 print('the element is None!') 70 71 def get_element_attrib_value(element, name): 72 '''根据节点element的属性名称name获取属性名称的值''' 73 if element is not None: 74 if name is not None and name != '': 75 return element.getAttribute(name) 76 else: 77 print('the name is None or equals \'\'.') 78 else: 79 print('the element is None!') 80 81 def get_info(root_children): 82 '''解析XML内容''' 83 info = [] 84 for item in root_children: 85 subs = [] 86 score_value = [] 87 i_no = get_element_attrib_value(item, 'no') 88 i_name = get_element_children(item, 'name') 89 i_gender = get_element_children(item, 'gender') 90 i_age = get_element_children(item, 'age') 91 i_score = get_element_children(item, 'score') 92 for sub in i_score: 93 i_sub = get_element_attrib_value(sub, 'subject') 94 subs.append(i_sub) 95 96 v_name = get_element_value(i_name[0]) 97 v_gender = get_element_value(i_gender[0]) 98 v_age = get_element_value(i_age[0]) 99 for s in range(len(i_score)): 100 score_value.append(s) 101 v_score = dict(zip(subs, score_value)) 102 info.append(v_name) 103 info.append(v_gender) 104 info.append(v_age) 105 info.append(v_score) 106 return info 107 108 109 def init(): 110 global SHOW_LOG 111 SHOW_LOG = True 112 global XML_PATH 113 XML_PATH = 'C:\\test\\hongten.xml' 114 115 def main(): 116 init() 117 dom = get_dom_by_parse(XML_PATH) 118 root = dom.documentElement 119 print(root) 120 root_children = get_element_children(root, 'student') 121 print(root_children) 122 info = get_info(root_children) 123 print(info) 124 125 if __name__ == '__main__': 126 main()