python解析xml模块封装代码_python教程-查字典教程网

摘要：有如下的xml文件：复制代码代码如下:12下面介绍python解析xml文件的几种方法，使用python模块实现。方式1，python模块实...

有如下的xml文件：

复制代码代码如下:

<?xml version="1.0" encoding="utf-8" ?>

<root>

</childs>

</root>

下面介绍python解析xml文件的几种方法，使用python模块实现。

方式1，python模块实现自动遍历所有节点：

复制代码代码如下:

#!/usr/bin/env python

# -*- coding: utf-8 -*-

from xml.sax.handler import ContentHandler

from xml.sax import parse

class TestHandle(ContentHandler):

def __init__(self, inlist):

self.inlist = inlist

def startElement(self,name,attrs):

print 'name:',name, 'attrs:',attrs.keys()

def endElement(self,name):

print 'endname',name

def characters(self,chars):

print 'chars',chars

self.inlist.append(chars)

if __name__ == '__main__':

lt = []

parse('test.xml', TestHandle(lt))

print lt

结果：

[html] view plaincopy

chars

chars

chars 1

endname child

chars

chars 2

endname child

chars

endname childs

chars

endname root

[u'n', u'n', u'1', u'n', u'2', u'n', u'n']

方式2，python模块实现获取根节点，按需查找指定节点：

复制代码代码如下:

#!/usr/bin/env python

# -*- coding: utf-8 -*-

from xml.dom import minidom

xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?>

<hash>

<request name='first'>/2/photos/square/type.xml</request>

<error_code>21301</error_code>

<error>auth faild!</error>

</hash>

'''

def doxml(xmlstr):

dom = minidom.parseString(xmlstr)

print 'Dom:'

print dom.toxml()

root = dom.firstChild

print 'root:'

print root.toxml()

childs = root.childNodes

for child in childs:

print child.toxml()

if child.nodeType == child.TEXT_NODE:

pass

else:

print 'child node attribute name:', child.getAttribute('name')

print 'child node name:', child.nodeName

print 'child node len:',len(child.childNodes)

print 'child data:',child.childNodes[0].data

print '======================================='

print 'more help info to see:'

for med in dir(child):

print help(med)

if __name__ == '__main__':

doxml(xmlstr)

结果：

[html] view plaincopy

Dom:

<?xml version="1.0" ?><hash>

<request name="first">/2/photos/square/type.xml</request>

<error_code>21301</error_code>

<error>auth faild!</error>

</hash>

root:

<hash>

<request name="first">/2/photos/square/type.xml</request>

<error_code>21301</error_code>

<error>auth faild!</error>

</hash>

<request name="first">/2/photos/square/type.xml</request>

child node attribute name: first

child node name: request

child node len: 1

child data: /2/photos/square/type.xml

=======================================

more help info to see:

两种方法各有其优点，python的xml处理模块太多，目前只用到这2个。

=====补充分割线================

实际工作中发现python的mimidom无法解析其它编码的xml，只能解析utf-8的编码，而其xml文件的头部申明也必须是utf-8，为其它编码会报错误。

网上的解决办法都是替换xml文件头部的编码申明，然后转换编码为utf-8再用minidom解码，实际测试为可行，不过有点累赘的感觉。

本节是 python解析xml模块封装代码的第二部分。

====写xml内容的分割线=========

复制代码代码如下:

#!ursbinenv python

#encoding: utf-8

from xml.dom import minidom

class xmlwrite:

def __init__(self, resultfile):

self.resultfile = resultfile

self.rootname = 'api'

self.__create_xml_dom()

def __create_xml_dom(self):

xmlimpl = minidom.getDOMImplementation()

self.dom = xmlimpl.createDocument(None, self.rootname, None)

self.root = self.dom.documentElement

def __get_spec_node(self, xpath):

patharr = xpath.split(r'/')

parentnode = self.root

exist = 1

for nodename in patharr:

if nodename.strip() == '':

continue

if not exist:

return None

spcindex = nodename.find('[')

if spcindex > -1:

index = int(nodename[spcindex+1:-1])

else:

index = 0

count = 0

childs = parentnode.childNodes

for child in childs:

if child.nodeName == nodename[:spcindex]:

if count == index:

parentnode = child

exist = 1

break

count += 1

continue

else:

exist = 0

return parentnode

def write_node(self, parent, nodename, value, attribute=None, CDATA=False):

node = self.dom.createElement(nodename)

if value:

if CDATA:

nodedata = self.dom.createCDATASection(value)

else:

nodedata = self.dom.createTextNode(value)

node.appendChild(nodedata)

if attribute and isinstance(attribute, dict):

for key, value in attribute.items():

node.setAttribute(key, value)

try:

parentnode = self.__get_spec_node(parent)

except:

print 'Get parent Node Fail, Use the Root as parent Node'

parentnode = self.root

parentnode.appendChild(node)

def write_start_time(self, time):

self.write_node('/','StartTime', time)

def write_end_time(self, time):

self.write_node('/','EndTime', time)

def write_pass_count(self, count):

self.write_node('/','PassCount', count)

def write_fail_count(self, count):

self.write_node('/','FailCount', count)

def write_case(self):

self.write_node('/','Case', None)

def write_case_no(self, index, value):

self.write_node('/Case[%s]/' % index,'No', value)

def write_case_url(self, index, value):

self.write_node('/Case[%s]/' % index,'URL', value)

def write_case_dbdata(self, index, value):

self.write_node('/Case[%s]/' % index,'DBData', value)

def write_case_apidata(self, index, value):

self.write_node('/Case[%s]/' % index,'APIData', value)

def write_case_dbsql(self, index, value):

self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)

def write_case_apixpath(self, index, value):

self.write_node('/Case[%s]/' % index,'APIXPath', value)

def save_xml(self):

myfile = file(self.resultfile, 'w')

self.dom.writexml(myfile, encoding='utf-8')

myfile.close()

if __name__ == '__main__':

xr = xmlwrite(r'D:test.xml')

xr.write_start_time('2223')

xr.write_end_time('444')

xr.write_pass_count('22')

xr.write_fail_count('33')

xr.write_case()

xr.write_case_no(0, '0')

xr.write_case_url(0, 'http://www.google.com')

xr.write_case_dbsql(0, 'select * from ')

xr.write_case_dbdata(0, 'dbtata')

xr.write_case_apixpath(0, '/xpath')

xr.write_case_apidata(0, 'apidata')

xr.write_case_no(1, '1')

xr.write_case_url(1, 'http://www.baidu.com')

xr.write_case_dbsql(1, 'select 1 from ')

xr.write_case_dbdata(1, 'dbtata1')

xr.write_case_apixpath(1, '/xpath1')

xr.write_case_apidata(1, 'apidata1')

xr.save_xml()

以上封装了minidom，支持通过xpath来写节点，不支持xpath带属性的匹配，但支持带索引的匹配。

比如：/root/child[1], 表示root的第2个child节点。