xml解析
xml文档
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2023</year>
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2026</year>
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2026</year>
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
<neighbor direction="E" name="Colombia" />
</country>
</data>
code
from xml.etree.ElementTree import parse
with open('demo.xml', 'r') as f:
et = parse(f)
print(et)
root = et.getroot()
print(root, root.tag, root.attrib, root.text.strip())
# 查找root下面的子元素,注意孙子元素是无法找到的
print(root.findall('country')) # list -> [<Element 'country' at 0x0071E120>, <Element 'country' at 0x0071E270>, <Element 'country' at 0x0071E360>]
print(root.iterfind('country')) # <generator object prepare_child.<locals>.select at 0x00707D20>
print('*' * 80)
for child in root:
print(child, child.get('name'))
print('*'*80)
for child in root.iter():
print(child) # 打印所有的元素结点
# * : 找到元素的子结点
# . : 当前的路径
# // :找到任意层次下的子元素
# .. :找到它的父对象
# [@attrib="value"]:找到属性值attrib为"value"的元素 <country name="Liechtenstein">
# [tag]
# [tag = 'text']:找到标签tag的值为text的元素
# position:root.findall('country[1]')
print(root.findall('country/*'))
print(root.findall('.//rank/..')) # 在当前结点下(root),找到其下面任意层的子元素(rank),的父元素(country),所以打印的是country元素,并且是一个list
print(root.findall('country[@name="Singapore"]'))
print(root.findall('country[rank]'))
print('*' * 80)
# position
# [<Element 'country' at 0x00A6E150>, <Element 'country' at 0x00A6E2A0>, <Element 'country' at 0x00A6E390>]
print(root.findall('country[1]')) # 找到第1个country元素
print(root.findall('country[2]')) # 找到第2个country元素
print(root.findall('country[last()]')) # 找到最后一个country元素
print(root.findall('country[last()-1]')) # 找到倒数第2个country元素
构建xml文档
简单使用
from xml.etree.ElementTree import Element, ElementTree
from xml.etree.ElementTree import tostring
e = Element('Data') # 创建一个元素
e.set('name', 'abc') # 为元素添加name属性,并指定name属性值为'abc'
e.text = '123' # 为元素添加文本内容
print(tostring(e)) # 将xml转化为字符串文本,但是不含'\n''\t':b'<Data name="abc">123</Data>',导致大文本的xml可读性差
e2 = Element('Row')
e3 = Element('Open')
e3.text = '8.80'
e2.append(e3) # 将e3添加到e2的子元素
e.append(e2) # 将e2添加到e的子元素
print(tostring(e))
et = ElementTree(e) # 生成ElementTree树结构,只需传入根节点即可
et.write('demo02.xml') # 将树结构写文件即可
code
import csv
from xml.etree.ElementTree import ElementTree, Element
def csvtoxml(fname):
with open(fname, 'r') as f:
reader = csv.reader(f)
header = next(reader)
root = Element('Data')
for row in reader:
erow = Element('Row')
root.append(erow)
for tag, text in zip(header, row):
e = Element(tag)
e.text = text
erow.append(e)
pretty(root)
return ElementTree(root)
# 美化xml格式
def pretty(e, level=0):
if len(e) > 0:
e.text = '\n' + '\t' * (level+1)
for child in e:
pretty(child, level+1)
child.tail = child.tail[:-1]
e.tail = '\n' + '\t' * level
et = csvtoxml('pingan.csv')
et.write('pingan.xml')