python提取xml属性导入Mysql
xml文档来自ganglia-gmond端telnet localhost 8649产生出来的文档,由于ganglia每隔一段时间就更新数据,为了永久保存数据到MySQL中,就用python写了最开始的代码,后续的数据库操作应该是update 某些属性的值。
1.使用MySQL建表
mysql>create database ganglia;
mysql>show databases;
mysql>use ganglia;
mysql> create table class (NAME char(20), VAL char(40), TYPE char(10), UNITS char(15), TN int, TMAX int, DMAX int, SLOPE char(10));
mysql> describe class2;
mysql>insert into class values('load_one','0.00','float',' ',7043,70,0,'both');
mysql> select * from class;
2.将数据保存至数据库
import xml.dom.minidom as minidom import MySQLdb dom = minidom.parse("181.xml") root = dom.getElementsByTagName("GANGLIA_XML") conn=MySQLdb.connect(host='localhost',user='root',passwd='123',port=3306) cur=conn.cursor() conn.select_db('ganglia') for node in root: gangliaxml = node.getElementsByTagName("CLUSTER") for cluster in gangliaxml: host = cluster.getElementsByTagName("HOST") for metric in host: #print(metric.getAttribute("NAME")) for metrics in metric.getElementsByTagName("METRIC"): value=[metrics.attributes["NAME"].value, metrics.attributes["VAL"].value, metrics.attributes["TYPE"].value, metrics.attributes["UNITS"].value, metrics.attributes["TN"].value, metrics.attributes["TMAX"].value, metrics.attributes["DMAX"].value, metrics.attributes["SLOPE"].value,] if metric.getAttribute("NAME") == '172.19.0.181': cur.execute('insert into class values(%s,%s,%s,%s,%s,%s,%s,%s)',value) conn.commit() elif metric.getAttribute("NAME") == '172.19.0.186': cur.execute('insert into class2 values(%s,%s,%s,%s,%s,%s,%s,%s)',value) conn.commit() else: cur.execute('insert into class3 values(%s,%s,%s,%s,%s,%s,%s,%s)',value) conn.commit() cur.close() conn.close() print 'Complete!'
xml文档是:
<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?> <!DOCTYPE GANGLIA_XML [ <!ELEMENT GANGLIA_XML (GRID|CLUSTER|HOST)*> <!ATTLIST GANGLIA_XML VERSION CDATA #REQUIRED> <!ATTLIST GANGLIA_XML SOURCE CDATA #REQUIRED> <!ELEMENT GRID (CLUSTER | GRID | HOSTS | METRICS)*> <!ATTLIST GRID NAME CDATA #REQUIRED> <!ATTLIST GRID AUTHORITY CDATA #REQUIRED> <!ATTLIST GRID LOCALTIME CDATA #IMPLIED> <!ELEMENT CLUSTER (HOST | HOSTS | METRICS)*> <!ATTLIST CLUSTER NAME CDATA #REQUIRED> <!ATTLIST CLUSTER OWNER CDATA #IMPLIED> <!ATTLIST CLUSTER LATLONG CDATA #IMPLIED> <!ATTLIST CLUSTER URL CDATA #IMPLIED> <!ATTLIST CLUSTER LOCALTIME CDATA #REQUIRED> <!ELEMENT HOST (METRIC)*> <!ATTLIST HOST NAME CDATA #REQUIRED> <!ATTLIST HOST IP CDATA #REQUIRED> <!ATTLIST HOST LOCATION CDATA #IMPLIED> <!ATTLIST HOST TAGS CDATA #IMPLIED> <!ATTLIST HOST REPORTED CDATA #REQUIRED> <!ATTLIST HOST TN CDATA #IMPLIED> <!ATTLIST HOST TMAX CDATA #IMPLIED> <!ATTLIST HOST DMAX CDATA #IMPLIED> <!ATTLIST HOST GMOND_STARTED CDATA #IMPLIED> <!ELEMENT METRIC (EXTRA_DATA*)> <!ATTLIST METRIC NAME CDATA #REQUIRED> <!ATTLIST METRIC VAL CDATA #REQUIRED> <!ATTLIST METRIC TYPE (string | int8 | uint8 | int16 | uint16 | int32 | uint32 | float | double | timestamp) #REQUIRED> <!ATTLIST METRIC UNITS CDATA #IMPLIED> <!ATTLIST METRIC TN CDATA #IMPLIED> <!ATTLIST METRIC TMAX CDATA #IMPLIED> <!ATTLIST METRIC DMAX CDATA #IMPLIED> <!ATTLIST METRIC SLOPE (zero | positive | negative | both | unspecified) #IMPLIED> <!ATTLIST METRIC SOURCE (gmond) 'gmond'> <!ELEMENT EXTRA_DATA (EXTRA_ELEMENT*)> <!ELEMENT EXTRA_ELEMENT EMPTY> <!ATTLIST EXTRA_ELEMENT NAME CDATA #REQUIRED> <!ATTLIST EXTRA_ELEMENT VAL CDATA #REQUIRED> <!ELEMENT HOSTS EMPTY> <!ATTLIST HOSTS UP CDATA #REQUIRED> <!ATTLIST HOSTS DOWN CDATA #REQUIRED> <!ATTLIST HOSTS SOURCE (gmond | gmetad) #REQUIRED> <!ELEMENT METRICS (EXTRA_DATA*)> <!ATTLIST METRICS NAME CDATA #REQUIRED> <!ATTLIST METRICS SUM CDATA #REQUIRED> <!ATTLIST METRICS NUM CDATA #REQUIRED> <!ATTLIST METRICS TYPE (string | int8 | uint8 | int16 | uint16 | int32 | uint32 | float | double | timestamp) #REQUIRED> <!ATTLIST METRICS UNITS CDATA #IMPLIED> <!ATTLIST METRICS SLOPE (zero | positive | negative | both | unspecified) #IMPLIED> <!ATTLIST METRICS SOURCE (gmond) 'gmond'> ]> <GANGLIA_XML VERSION="3.6.1" SOURCE="gmond"> <CLUSTER NAME="sg620g" LOCALTIME="1432625962" OWNER="unspecified" LATLONG="unspecified" URL="unspecified"> <HOST NAME="172.19.0.181" IP="172.19.0.181" TAGS="" REPORTED="1432625957" TN="4" TMAX="20" DMAX="0" LOCATION="unspecified" GMOND_STARTED="1432609944"> <METRIC NAME="load_one" VAL="0.01" TYPE="float" UNITS=" " TN="33" TMAX="70" DMAX="0" SLOPE="both"> <EXTRA_DATA> <EXTRA_ELEMENT NAME="GROUP" VAL="load"/> <EXTRA_ELEMENT NAME="DESC" VAL="One minute load average"/> <EXTRA_ELEMENT NAME="TITLE" VAL="One Minute Load Average"/> </EXTRA_DATA> </METRIC> ............... </HOST> <HOST NAME="172.19.0.186" IP="172.19.0.186" TAGS="" REPORTED="1432625960" TN="2" TMAX="20" DMAX="0" LOCATION="unspecified" GMOND_STARTED="1432366857"> <METRIC NAME="load_one" VAL="0.53" TYPE="float" UNITS=" " TN="33" TMAX="70" DMAX="0" SLOPE="both"> <EXTRA_DATA> <EXTRA_ELEMENT NAME="GROUP" VAL="load"/> <EXTRA_ELEMENT NAME="DESC" VAL="One minute load average"/> <EXTRA_ELEMENT NAME="TITLE" VAL="One Minute Load Average"/> </EXTRA_DATA> </METRIC> ................... </HOST> <HOST NAME="172.19.0.177" IP="172.19.0.177" TAGS="" REPORTED="1432625960" TN="2" TMAX="20" DMAX="0" LOCATION="unspecified" GMOND_STARTED="1432395875"> <METRIC NAME="load_one" VAL="0.39" TYPE="float" UNITS=" " TN="33" TMAX="70" DMAX="0" SLOPE="both"> <EXTRA_DATA> <EXTRA_ELEMENT NAME="GROUP" VAL="load"/> <EXTRA_ELEMENT NAME="DESC" VAL="One minute load average"/> <EXTRA_ELEMENT NAME="TITLE" VAL="One Minute Load Average"/> </EXTRA_DATA> </METRIC> ......................... </HOST> </CLUSTER> </GANGLIA_XML>