python开发_xml.dom_解析XML文档_完整版_博主推荐

在阅读之前,你需要了解一些xml.dom的一些理论知识,在这里你可以对xml.dom有一定的了解,如果你阅读完之后。

下面是我做的demo

运行效果:

解析的XML文件位置:c:\\test\\hongten.xml

 1 <?xml version="1.0" encoding="UTF-8"?>
 2 <students>
 3     <student no="2009081097">
 4         <name>Hongten</name>
 5         <gender>M</gender>
 6         <age>20</age>
 7         <score subject="math">97</score>
 8         <score subject="chinese">90</score>
 9     </student>
10     <student no="2009081098">
11         <name>DuDu</name>
12         <gender>W</gender>
13         <age>21</age>
14         <score subject="math">87</score>
15         <score subject="chinese">96</score>
16     </student>
17     <student no="2009081099">
18         <name>Sum</name>
19         <gender>M</gender>
20         <age>19</age>
21         <score subject="math">64</score>
22         <score subject="chinese">98</score>
23     </student>
24 </students>

====================================================

代码部分:

====================================================

  1 #python xml.dom
  2 
  3 #Author   :   Hongten
  4 #Mailto   :   hongtenzone@foxmail.com
  5 #Blog     :   http://www.cnblogs.com/hongten
  6 #QQ       :   648719819
  7 #Version  :   1.0
  8 #Create   :   2013-09-03
  9 
 10 import os
 11 from xml.dom import minidom
 12 
 13 #global var
 14 SHOW_LOG = True
 15 XML_PATH = None
 16 
 17 def get_dom_by_parse(path):
 18     '''根据XML文件地址解析XML文件,返回dom对象'''
 19     if os.path.exists(path):
 20         if SHOW_LOG:
 21             print('开始解析XML文件:[{}]'.format(path))
 22         return minidom.parse(path)
 23     else:
 24         print('the path [{}] dose not exist!'.format(path))
 25 
 26 def get_dom_by_file(path):
 27     '''解析作为文档打开的XML文件'''
 28     if os.path.exists(path):
 29         if SHOW_LOG:
 30             print('开始打开XML文件:[{}]'.format(path))
 31         with open(path) as pf:
 32             if SHOW_LOG:
 33                 print('开始解析XML文件:[{}]'.format(path))
 34             return minidom.parse(pf)
 35     else:
 36         print('the path [{}] dose not exist!'.format(path))
 37 
 38 def get_dom_by_string(s):
 39     '''解析以字符串形式的XML数据格式'''
 40     if s is not None and s != '':
 41         if SHOW_LOG:
 42             print('开始解析字符串形式的XML数据:[{}]'.format(s))
 43         return minidom.parseString(s)
 44     else:
 45         print('the input string is None or equals \'\'.')
 46         
 47 def get_root(dom):
 48     '''返回XML文件的根节点'''
 49     if dom is not None:
 50         return dom.documentElement
 51     else:
 52         print('the dom is None!')
 53 
 54 def get_element_children(fatherElement, subNodeName):
 55     '''根据父节点fatherElement获取子节点subNodeName'''
 56     if fatherElement is not None:
 57         if subNodeName is not None and subNodeName != '':
 58             return fatherElement.getElementsByTagName(subNodeName)
 59         else:
 60             print('the sub node name is None or equals \'\'.')
 61     else:
 62         print('the father node is None!')
 63         
 64 def get_element_value(element, index=0):
 65     '''获取节点的值'''
 66     if element is not None:
 67         return element.childNodes[index].nodeValue
 68     else:
 69         print('the element is None!')
 70 
 71 def get_element_attrib_value(element, name):
 72     '''根据节点element的属性名称name获取属性名称的值'''
 73     if element is not None:
 74         if name is not None and name != '':
 75             return element.getAttribute(name)
 76         else:
 77             print('the name is None or equals \'\'.')
 78     else:
 79         print('the element is None!')
 80 
 81 def get_info(root_children):
 82     '''解析XML内容'''
 83     info = []
 84     for item in root_children:
 85         subs = []
 86         score_value = []
 87         i_no = get_element_attrib_value(item, 'no')
 88         i_name = get_element_children(item, 'name')
 89         i_gender = get_element_children(item, 'gender')
 90         i_age = get_element_children(item, 'age')
 91         i_score = get_element_children(item, 'score')
 92         for sub in i_score:
 93             i_sub = get_element_attrib_value(sub, 'subject')
 94             subs.append(i_sub)
 95         
 96         v_name = get_element_value(i_name[0])
 97         v_gender = get_element_value(i_gender[0])
 98         v_age = get_element_value(i_age[0])
 99         for s in range(len(i_score)):
100             score_value.append(s)
101         v_score = dict(zip(subs, score_value))
102         info.append(v_name)
103         info.append(v_gender)
104         info.append(v_age)
105         info.append(v_score)
106     return info
107         
108 
109 def init():
110     global SHOW_LOG
111     SHOW_LOG = True
112     global XML_PATH
113     XML_PATH = 'C:\\test\\hongten.xml'
114 
115 def main():
116     init()
117     dom = get_dom_by_parse(XML_PATH)
118     root = dom.documentElement
119     print(root)
120     root_children = get_element_children(root, 'student')
121     print(root_children)
122     info = get_info(root_children)
123     print(info)
124 
125 if __name__ == '__main__':
126     main()

 

posted @ 2013-09-03 19:37  Hongten  阅读(1602)  评论(0编辑  收藏  举报
Fork me on GitHub