nodeName,nodeValue未知 xml 入库方案 The ElementTree iterparse Function

 

 

 

 

 

import xml.etree.ElementTree as ET
from lxml.html import *
from  xmljson import badgerfish as bf
from pymongo import *

xmlDict = {}
def RecursionDict(dict_a):
        if isinstance(dict_a,dict):
                for x in range(0,len(dict_a)):
                        temp_key = dict_a.keys()[x]
                        temp_value = dict_a[temp_key]
                        if   isinstance(temp_value,dict) and len(temp_value)== 1 :
                w = temp_value[temp_value.keys()[0]]
                            if not  isinstance(w,dict):
                    xmlDict[temp_key] = w        
                        RecursionDict(temp_value)


tree = ET.parse('listorderitems1493779131.xml')
root = tree.getroot()
xmlstr = ET.tostring(root, "us-ascii", "xml")
print xmlstr

res = bf.data(fromstring(xmlstr))
print res
RecursionDict(res)
print xmlDict
client = MongoClient()
db = client.apixmldict
collection = db.col
collection.save(xmlDict)

 

 

wuser@ubuntu:~/apiamzpy$ cat  wxmljsondict.py
import xml.etree.ElementTree as ET
from lxml.html import *
from  xmljson import badgerfish as bf
from pymongo import *

xmlDict = {}
def RecursionDict(dict_a):
        if isinstance(dict_a,dict):
                for x in range(0,len(dict_a)):
                        temp_key = dict_a.keys()[x]
                        temp_value = dict_a[temp_key]
                        if   isinstance(temp_value,dict) and len(temp_value)== 1 :
                w = temp_value[temp_value.keys()[0]]
                            if not  isinstance(w,dict):
                    xmlDict[temp_key] = w        
                        RecursionDict(temp_value)


tree = ET.parse('listorderitems1493779131.xml')
root = tree.getroot()
xmlstr = ET.tostring(root, "us-ascii", "xml")
print xmlstr

res = bf.data(fromstring(xmlstr))
print res
RecursionDict(res)
print xmlDict
client = MongoClient()
db = client.apixmldict
collection = db.col
collection.save(xmlDict)
wuser@ubuntu:~/apiamzpy$ cat listorderitems1493779131.xml
<?xml version="1.0"?>
<ListOrderItemsResponse xmlns="https://mws.amazonservices.com/Orders/2013-09-01">
  <ListOrderItemsResult>
    <AmazonOrderId>123-1239963-8862642</AmazonOrderId>
    <OrderItems>
      <OrderItem>
        <ASIN>B01M123ABC</ASIN>
        <SellerSKU>ABCEHM054AWUS-USAS2</SellerSKU>
        <OrderItemId>12325810562154</OrderItemId>
        <Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</Title>
        <QuantityOrdered>1</QuantityOrdered>
        <QuantityShipped>0</QuantityShipped>
        <PromotionIds/>
      </OrderItem>
    </OrderItems>
  </ListOrderItemsResult>
  <ResponseMetadata>
    <RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</RequestId>
  </ResponseMetadata>
</ListOrderItemsResponse>
wuser@ubuntu:~/apiamzpy$ 

 

 

> db.col.save({'w':123})
WriteResult({ "nInserted" : 1 })
> db.col.find().pretty()
{ "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
> db.col.find().pretty()
{ "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
{
        "_id" : ObjectId("590b35ab1d41c832e2b6048b"),
        "exception" : "<ns0:ListOrderItemsResponse xmlns:ns0=\"https://mws.amazonservices.com/Orders/2013-09-01\">\n  <ns0:ListOrderItemsResult>\n    <ns0:AmazonOrderId>123-1239963-8862642</ns0:AmazonOrderId>\n    <ns0:OrderItems>\n      <ns0:OrderItem>\n        <ns0:ASIN>B01M123ABC</ns0:ASIN>\n        <ns0:SellerSKU>ABCEHM054AWUS-USAS2</ns0:SellerSKU>\n        <ns0:OrderItemId>12325810562154</ns0:OrderItemId>\n        <ns0:Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</ns0:Title>\n        <ns0:QuantityOrdered>1</ns0:QuantityOrdered>\n        <ns0:QuantityShipped>0</ns0:QuantityShipped>\n        <ns0:PromotionIds />\n      </ns0:OrderItem>\n    </ns0:OrderItems>\n  </ns0:ListOrderItemsResult>\n  <ns0:ResponseMetadata>\n    <ns0:RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</ns0:RequestId>\n  </ns0:ResponseMetadata>\n</ns0:ListOrderItemsResponse>"
}
{
        "_id" : ObjectId("590b35ab1d41c832e2b6048c"),
        "asin" : "B01M123ABC",
        "sellersku" : "ABCEHM054AWUS-USAS2",
        "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
        "amazonorderid" : "123-1239963-8862642",
        "quantityshipped" : 0,
        "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
        "quantityordered" : 1,
        "orderitemid" : NumberLong("12325810562154")
}
> db.col.find().pretty()
{ "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
{
        "_id" : ObjectId("590b35ab1d41c832e2b6048b"),
        "exception" : "<ns0:ListOrderItemsResponse xmlns:ns0=\"https://mws.amazonservices.com/Orders/2013-09-01\">\n  <ns0:ListOrderItemsResult>\n    <ns0:AmazonOrderId>123-1239963-8862642</ns0:AmazonOrderId>\n    <ns0:OrderItems>\n      <ns0:OrderItem>\n        <ns0:ASIN>B01M123ABC</ns0:ASIN>\n        <ns0:SellerSKU>ABCEHM054AWUS-USAS2</ns0:SellerSKU>\n        <ns0:OrderItemId>12325810562154</ns0:OrderItemId>\n        <ns0:Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</ns0:Title>\n        <ns0:QuantityOrdered>1</ns0:QuantityOrdered>\n        <ns0:QuantityShipped>0</ns0:QuantityShipped>\n        <ns0:PromotionIds />\n      </ns0:OrderItem>\n    </ns0:OrderItems>\n  </ns0:ListOrderItemsResult>\n  <ns0:ResponseMetadata>\n    <ns0:RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</ns0:RequestId>\n  </ns0:ResponseMetadata>\n</ns0:ListOrderItemsResponse>"
}
{
        "_id" : ObjectId("590b35ab1d41c832e2b6048c"),
        "asin" : "B01M123ABC",
        "sellersku" : "ABCEHM054AWUS-USAS2",
        "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
        "amazonorderid" : "123-1239963-8862642",
        "quantityshipped" : 0,
        "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
        "quantityordered" : 1,
        "orderitemid" : NumberLong("12325810562154")
}
{
        "_id" : ObjectId("590b35cd1d41c832ec3d2c03"),
        "asin" : "B01M123ABC",
        "sellersku" : "ABCEHM054AWUS-USAS2",
        "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
        "amazonorderid" : "123-1239963-8862642",
        "quantityshipped" : 0,
        "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
        "quantityordered" : 1,
        "orderitemid" : NumberLong("12325810562154")
}
> db.col.find().pretty()
{ "_id" : ObjectId("590b35877511f2683d345653"), "w" : 123 }
{
        "_id" : ObjectId("590b35ab1d41c832e2b6048b"),
        "exception" : "<ns0:ListOrderItemsResponse xmlns:ns0=\"https://mws.amazonservices.com/Orders/2013-09-01\">\n  <ns0:ListOrderItemsResult>\n    <ns0:AmazonOrderId>123-1239963-8862642</ns0:AmazonOrderId>\n    <ns0:OrderItems>\n      <ns0:OrderItem>\n        <ns0:ASIN>B01M123ABC</ns0:ASIN>\n        <ns0:SellerSKU>ABCEHM054AWUS-USAS2</ns0:SellerSKU>\n        <ns0:OrderItemId>12325810562154</ns0:OrderItemId>\n        <ns0:Title>wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags</ns0:Title>\n        <ns0:QuantityOrdered>1</ns0:QuantityOrdered>\n        <ns0:QuantityShipped>0</ns0:QuantityShipped>\n        <ns0:PromotionIds />\n      </ns0:OrderItem>\n    </ns0:OrderItems>\n  </ns0:ListOrderItemsResult>\n  <ns0:ResponseMetadata>\n    <ns0:RequestId>8cc6b5dc-f79e-4da4-b914-9f14388c0bbf</ns0:RequestId>\n  </ns0:ResponseMetadata>\n</ns0:ListOrderItemsResponse>"
}
{
        "_id" : ObjectId("590b35ab1d41c832e2b6048c"),
        "asin" : "B01M123ABC",
        "sellersku" : "ABCEHM054AWUS-USAS2",
        "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
        "amazonorderid" : "123-1239963-8862642",
        "quantityshipped" : 0,
        "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
        "quantityordered" : 1,
        "orderitemid" : NumberLong("12325810562154")
}
{
        "_id" : ObjectId("590b35cd1d41c832ec3d2c03"),
        "asin" : "B01M123ABC",
        "sellersku" : "ABCEHM054AWUS-USAS2",
        "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
        "amazonorderid" : "123-1239963-8862642",
        "quantityshipped" : 0,
        "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
        "quantityordered" : 1,
        "orderitemid" : NumberLong("12325810562154")
}
{
        "_id" : ObjectId("590b39841d41c833325a4dcd"),
        "asin" : "B01M123ABC",
        "sellersku" : "ABCEHM054AWUS-USAS2",
        "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
        "amazonorderid" : "123-1239963-8862642",
        "quantityshipped" : 0,
        "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
        "quantityordered" : 1,
        "orderitemid" : NumberLong("12325810562154")
}
{
        "_id" : ObjectId("590b3cc51d41c83347fbfcb1"),
        "asin" : "B01M123ABC",
        "sellersku" : "ABCEHM054AWUS-USAS2",
        "title" : "wwwTEST_DATA_Holife Vacuum Sealer, Silver Compact Food Saver Wet/Dry Vacuum Sealing System with Food Grade Starter Bags",
        "amazonorderid" : "123-1239963-8862642",
        "quantityshipped" : 0,
        "requestid" : "8cc6b5dc-f79e-4da4-b914-9f14388c0bbf",
        "quantityordered" : 1,
        "orderitemid" : NumberLong("12325810562154")
}
> db.col.find().count()
9
> db.col.find().count()
10
>

 

 

 问题:nodeName,nodeValue未知 xml动态数据入库,   解决方案:  
 
入库结果
 
test-data
 
 

---->更高效的

nodeName,nodeValue未知 xml 入库方案

 

 

 

xml--->?--->database
json只是过程,不是目的;


想到一种算法是将xml转为string,然后借助正则辅助去处理该string,目测可行但是觉得似乎其不高效;;;;


也许xpath的原理就是上述算法??

 

 

 

 

SAX解析多层嵌套XML - donglindonglin的博客 - 博客频道 - CSDN.NET
http://blog.csdn.net/donglindonglin/article/details/51996926

 

 

 

 

 

wuser@ubuntu:~/apiamzpy$ python wl.py
<listiterator object at 0x7f6c99c20ed0>
[0, 23, 'w1']
Traceback (most recent call last):
  File "wl.py", line 5, in <module>
    if  t0.next():
StopIteration
wuser@ubuntu:~/apiamzpy$ vim wl.py

l0 = [0,23,'w1']
t0 = l0.__iter__()
print t0
print list(t0)
if  t0.next():
        t0.next()
~                        

 

 

问题:

xml取出所有的nodeName、nodeValue对

 

 

0-不高效的方案:0-0-php / python 将xml处理为string;0-1-利用正则处理字符串。

 

w

 

 

http://effbot.org/zone/element-iterparse.htm

posted @ 2017-05-03 13:49  papering  阅读(237)  评论(0编辑  收藏  举报