Fork me on GitHub

Python解析XML文件

 

XML与JSON的互相转化详见:XML模块

https://www.cnblogs.com/shengyang17/p/8606223.html

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

 

# coding=utf-8
import xml.etree.ElementTree as ET
tree = ET.parse("xmltest.xml")  #open
root = tree.getroot() #f.seek(0)  <Element 'data' at 0x027EAAE0>
print(root.tag) #data 根节点

for child in root:
    print('-------------',child.tag,child.attrib) #------------- country {'name': 'Liechtenstein'}
    for i in child:
        print(i.tag, i.text) ##获取xml: <rank updated="yes">2</rank>    <year>2008</year>
'''                                                ##rank 2                 year 2008
------------- country {'name': 'Liechtenstein'}
rank 2
year 2008
gdppc 141100
neighbor None
neighbor None
------------- country {'name': 'Singapore'}
rank 5
year 2011
gdppc 59900
neighbor None
------------- country {'name': 'Panama'}
rank 69
year 2011
gdppc 13600
neighbor None
neighbor None
'''

for node in root.iter('year'): ##只遍历year节点
    print(node.tag, node.text)
'''
year 2008
year 2011
year 2011
'''    

event.xml

<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<Events>
    <Event xmlns1='http://schemas.microsoft.com/win/2004/08/events/event'>
        <System>
            <Provider Name='Microsoft-Windows-Security-SPP' Guid='{E23B3380-C8C9-472C-F28DFEA0F156}' EventSourceName='Software Protection Platform Server'></Provider>
            <EventID Qualifiers='16384'>902</EventID>
            <Version>0</Version>
            <Level>0</Level>
            <Task>0</Task>
            <Opcode>0</Opcode>
            <Keywords>0x80000000000</Keywords>
            <TimeCreated SystemTime='2018-12-25T09:08:59.00000000000Z'/>
            <EventRecordID>8634</EventRecordID>
            <Correlation/>
            <Execution ProcessID='0' ThreadID='0'/>
            <Channel>Application</Channel>
            <Computer>WIN-CANDBPBBOCN</Computer>
            <Security/>
        </System>
        <EventData>
            <Date>6.1.7601.17514</Date>
        </EventData>
        <RenderingInfo Culture='zh-CN'>
            <Message>软件保护服务已启动。6.1.7601.17514</Message>
            <Level>信息</Level>
            <Task></Task>
            <Opcode></Opcode>
            <Channel></Channel>
            <Provider>Microsoft-Window-Security-SPP</Provider>
            <keywords>
                <keyword>经典</keyword>
            </keywords>
        </RenderingInfo>
    </Event>
    
    <Event xmlns1='http://schemas.microsoft.com/win/2004/08/events/event'>
        <System>
            <Provider Name='Microsoft-Windows-Security-SPP' Guid='{E23B3380-C8C9-472C-F28DFEA0F156}' EventSourceName='Software Protection Platform Server'></Provider>
            <EventID Qualifiers='16384'>902</EventID>
            <Version>0</Version>
            <Level>0</Level>
            <Task>0</Task>
            <Opcode>0</Opcode>
            <Keywords>0x80000000000</Keywords>
            <TimeCreated SystemTime='2018-12-25T09:08:59.00000000000Z'/>
            <EventRecordID>8634</EventRecordID>
            <Correlation/>
            <Execution ProcessID='0' ThreadID='0'/>
            <Channel>Application</Channel>
            <Computer>WIN-CANDBPBBOCN</Computer>
            <Security/>
        </System>
        <EventData>
            <Date>6.1.7601.17514</Date>
        </EventData>
        <RenderingInfo Culture='zh-CN'>
            <Message>软件保护服务已启动。6.1.7601.17514</Message>
            <Level>信息</Level>
            <Task></Task>
            <Opcode></Opcode>
            <Channel></Channel>
            <Provider>Microsoft-Window-Security-SPP</Provider>
            <keywords>
                <keyword>经典</keyword>
            </keywords>
        </RenderingInfo>
    </Event>
    
    <Event xmlns1='http://schemas.microsoft.com/win/2004/08/events/event'>
        <System>
            <Provider Name='Microsoft-Windows-Security-SPP' Guid='{E23B3380-C8C9-472C-F28DFEA0F156}' EventSourceName='Software Protection Platform Server'></Provider>
            <EventID Qualifiers='16384'>902</EventID>
            <Version>0</Version>
            <Level>0</Level>
            <Task>0</Task>
            <Opcode>0</Opcode>
            <Keywords>0x80000000000</Keywords>
            <TimeCreated SystemTime='2018-12-25T09:08:59.00000000000Z'/>
            <EventRecordID>8634</EventRecordID>
            <Correlation/>
            <Execution ProcessID='0' ThreadID='0'/>
            <Channel>Application</Channel>
            <Computer>WIN-CANDBPBBOCN</Computer>
            <Security/>
        </System>
        <EventData>
            <Date>6.1.7601.17514</Date>
        </EventData>
        <RenderingInfo Culture='zh-CN'>
            <Message>软件保护服务已启动。6.1.7601.17514</Message>
            <Level>信息</Level>
            <Task></Task>
            <Opcode></Opcode>
            <Channel></Channel>
            <Provider>Microsoft-Window-Security-SPP</Provider>
            <keywords>
                <keyword>经典</keyword>
            </keywords>
        </RenderingInfo>
    </Event>
    
    <Event xmlns1='http://schemas.microsoft.com/win/2004/08/events/event'>
        <System>
            <Provider Name='Microsoft-Windows-Security-SPP' Guid='{E23B3380-C8C9-472C-F28DFEA0F156}' EventSourceName='Software Protection Platform Server'></Provider>
            <EventID Qualifiers='16384'>902</EventID>
            <Version>0</Version>
            <Level>0</Level>
            <Task>0</Task>
            <Opcode>0</Opcode>
            <Keywords>0x80000000000</Keywords>
            <TimeCreated SystemTime='2018-12-25T09:08:59.00000000000Z'/>
            <EventRecordID>8634</EventRecordID>
            <Correlation/>
            <Execution ProcessID='0' ThreadID='0'/>
            <Channel>Application</Channel>
            <Computer>WIN-CANDBPBBOCN</Computer>
            <Security/>
        </System>
        <EventData>
            <Date>6.1.7601.17514</Date>
        </EventData>
        <RenderingInfo Culture='zh-CN'>
            <Message>软件保护服务已启动。6.1.7601.17514</Message>
            <Level>信息</Level>
            <Task></Task>
            <Opcode></Opcode>
            <Channel></Channel>
            <Provider>Microsoft-Window-Security-SPP</Provider>
            <keywords>
                <keyword>经典</keyword>
            </keywords>
        </RenderingInfo>
    </Event>

</Events>
View Code
import xml.etree.ElementTree as ET

def es(mvalue):
    pass

def packNodes(nodes,m):
    '''
    去除空字典方法
    if len(nodes) < 0 or nodes.attrib == None or nodes.text == None:
        return        
    '''     
    if len(nodes) < 0:
        return                                                                                                                                                                                                                                                                                                                                                                                                                                              
    tag = nodes.tag ##所有的{key: value}key值
    text = nodes.text ##所有的value值
    m[tag] = {} ## m = {}空字典,把key值添加到新字典中;比如:{'Events': {}} {'Event': {}}, m的value作为{}空字典
    m[tag].update(nodes.attrib) #nodes.attrib是打印出 把=两边的作为{key: value }字典的形式--作为m字典的value
    #print(m) #{'Events': {}}  {'Event': {'xmlns1': 'http://schemas.microsoft.com/win/2004/08/events/event'}}...

    if text != None:
        if text.strip() != "":
            m[tag]["value"] = text #去除空值和空格
    for node in nodes: #nodes一开始是作为根节点
        packNodes(node,m[tag]) ##递归调用

def main():
    
    # 创建文档树
    tree = ET.ElementTree(file="event.xml")
    # 获取根节点
    root = tree.getroot()
    print(root)#<Element 'Events' at 0x01EAAB40>
    list = []
    for r in root:
        m = {}
        packNodes(root,m) #
        list.append(m)
    # root.attrib--->字典
    # root.tag   --->字符串
    # root.text  --->字符串
    # 元素封装到map中
    '''
    封装单个数组
    m = {}
    packNodes(root,m)
    '''
    print(list)
    # es(m)


if __name__ == '__main__':
    main()

结果如下:

[{'Events': 
    {'Event': {'xmlns1': 'http://schemas.microsoft.com/win/2004/08/events/event', 
    'System': 
        {'Provider': {'Name': 'Microsoft-Windows-Security-SPP', 'Guid': '{E23B3380-C8C9-472C-F28DFEA0F156}', 'EventSourceName': 'Software ProtectionPlatform Server'}, 
        'EventID': {'Qualifiers': '16384', 'value': '902'}, 
        'Version': {'value': '0'}, 'Level': {'value': '0'}, 
        'Task': {'value': '0'}, 
        'Opcode': {'value': '0'}, 
        'Keywords': {'value': '0x80000000000'}, 
        'TimeCreated': {'SystemTime': '2018-12-25T09:08:59.00000000000Z'}, 
        'EventRecordID': {'value': '8634'}, 
        'Correlation': {}, 
        'Execution': {'ProcessID': '0', 'ThreadID': '0'}, 
        'Channel': {'value': 'Application'}, 
        'Computer': {'value': 'WIN-CANDBPBBOCN'}, 
        'Security': {}}, 
    'EventData': {'Date': {'value': '6.1.7601.17514'}}, 
    'RenderingInfo': {'Culture': 'zh-CN', 'Message': {'value': '软件保护服务已启动。6.1.7601.17514'}, 'Level':{'value': '信息'}, 'Task': {}, 'Opcode': {}, 'Channel': {}, 'Provider': {'value': 'Microsoft-Window-Security-SPP'}, 'keywords': {'keyword': {'value': '经典'}}}
}}}, 
{'Events': 
    {'Event': {'xmlns1': 'http://schemas.microsoft.com/win/2004/08/events/event', 'System': {'Provider': {'Name': 'Microsoft-Windows-Security-SPP',
'Guid': '{E23B3380-C8C9-472C-F28DFEA0F156}', 'EventSourceName': 'Software Protec
tion Platform Server'}, 'EventID': {'Qualifiers': '16384', 'value': '902'}, 'Ver
sion': {'value': '0'}, 'Level': {'value': '0'}, 'Task': {'value': '0'}, 'Opcode'
: {'value': '0'}, 'Keywords': {'value': '0x80000000000'}, 'TimeCreated': {'Syste
mTime': '2018-12-25T09:08:59.00000000000Z'}, 'EventRecordID': {'value': '8634'},
 'Correlation': {}, 'Execution': {'ProcessID': '0', 'ThreadID': '0'}, 'Channel':
 {'value': 'Application'}, 'Computer': {'value': 'WIN-CANDBPBBOCN'}, 'Security':
 {}}, 'EventData': {'Date': {'value': '6.1.7601.17514'}}, 'RenderingInfo': {'Cul
ture': 'zh-CN', 'Message': {'value': '软件保护服务已启动。6.1.7601.17514'}, 'Lev
el': {'value': '信息'}, 'Task': {}, 'Opcode': {}, 'Channel': {}, 'Provider': {'v
alue': 'Microsoft-Window-Security-SPP'}, 'keywords': {'keyword': {'value': '经典
'}}}}}}, 
...]

 

posted @ 2019-04-26 21:06  kris12  阅读(2728)  评论(0编辑  收藏  举报
levels of contents