2015年1月15日木曜日

[Python][XML] xml.dom.minidom

XML を解析するには minidom を使う

test01.xml
  <?xml version="1.0" encoding="utf-8"?>
  <breakfast_menu>
    <food id="0">
      <name>Belgian Waffles</name>
      <price>$5.95</price>
      <description>two of our famous Belgian Waffles with plenty of real maple syrup</description>
      <calories>650</calories>
    </food>
    <food id="1">
      <name>Strawberry Belgian Waffles</name>
      <price>$7.95</price>
      <description>light Belgian waffles covered with strawberries and whipped cream</description>
      <calories>900</calories>
    </food>
    <food id="2">
      <name>Berry-Berry Belgian Waffles</name>
      <price>$8.95</price>
      <description>light Belgian waffles covered with an assortment of fresh berries and whipped cream</description>
      <calories>900</calories>
    </food>
    <food id="3">
      <name>French Toast</name>
      <price>$4.50</price>
      <description>thick slices made from our homemade sourdough bread</description>
      <calories>600</calories>
    </food>
    <food id="4">
      <name>Homestyle Breakfast</name>
      <price>$6.95</price>
      <description>two eggs, bacon or sausage, toast, and our ever-popular hash browns</description>
      <calories>950</calories>
    </food>
  </breakfast_menu>

test01.py
# -*- coding:utf-8 -*-
import xml.dom.minidom
import pprint

dom = xml.dom.minidom.parse('test01.xml')
pprint.pprint(dom)

# NodeList
foods = dom.getElementsByTagName('food')
print "NodeList(food)"
pprint.pprint(foods)
print "length: %s" % (foods.length)

# Node
food = foods.item(0)
pprint.pprint(food)
print "Node(food)"
print "nodeType: ", food.nodeType
print "nodeName: ", food.nodeName
print "hasChildNodes(): ", food.hasChildNodes()
print "hasAttributes(): ", food.hasAttributes()

# NodeList
names = food.getElementsByTagName('name')
print "NodeList(name)"
pprint.pprint(names)
print "length: ", names.length
name = names.item(0)
print "Node(name)"
pprint.pprint(names)
print "nodeType: ", name.nodeType
print "nodeName: ", name.nodeName
print "nodeValue: ", name.nodeValue
print "hasChildNodes(): ", name.hasChildNodes()
print "hasAttributes(): ", name.hasAttributes()

# NodeList
nameDatas = name.childNodes
print "NodeList(data)"
pprint.pprint(nameDatas)
print "length: ", nameDatas.length

# Node
data = nameDatas.item(0)
print "Text Node(data)"
pprint.pprint(data)
print "nodeType: ", data.nodeType
print "nodeName: ", data.nodeName
print "nodeValue: ", data.nodeValue
print "data: ", data.data

# nodeType 定数
print "nodeType constants"
node = xml.dom.Node
print " ELEMENT_NODE : ", node.ELEMENT_NODE
print " ATTRIBUTE_NODE : ", node.ATTRIBUTE_NODE
print " TEXT_NODE : ", node.TEXT_NODE
print " CDATA_SECTION_NODE : ", node.CDATA_SECTION_NODE
print " ENTITY_NODE : ", node.ENTITY_NODE
print " PROCESSING_INSTRUCTION_NODE: ", node.PROCESSING_INSTRUCTION_NODE
print " COMMENT_NODE : ", node.COMMENT_NODE
print " DOCUMENT_NODE : ", node.DOCUMENT_NODE
print " DOCUMENT_TYPE_NODE : ", node.DOCUMENT_TYPE_NODE
print " NOTATION_NODE : ", node.NOTATION_NODE

実行結果
> python test01.py
<xml.dom.minidom.Document instance at 0x00B402D8>
NodeList(food)
[<DOM Element: food at 0xb45418>,
<DOM Element: food at 0xb457d8>,
<DOM Element: food at 0xb45b98>,
<DOM Element: food at 0xb45f58>,
<DOM Element: food at 0xb4b350>]
length: 5
<DOM Element: food at 0xb45418>
Node(food)
nodeType: 1
nodeName: food
hasChildNodes(): True
hasAttributes(): True
NodeList(name)
[<DOM Element: name at 0xb45530>]
length: 1
Node(name)
[<DOM Element: name at 0xb45530>]
nodeType: 1
nodeName: name
nodeValue: None
hasChildNodes(): True
hasAttributes(): False
NodeList(data)
[<DOM Text node "u'Belgian Wa'...">]
length: 1
Text Node(data)
<DOM Text node "u'Belgian Wa'...">
nodeType: 3
nodeName: #text
nodeValue: Belgian Waffles
data: Belgian Waffles
nodeType constants
ELEMENT_NODE : 1
ATTRIBUTE_NODE : 2
TEXT_NODE : 3
CDATA_SECTION_NODE : 4
ENTITY_NODE : 6
PROCESSING_INSTRUCTION_NODE: 7
COMMENT_NODE : 8
DOCUMENT_NODE : 9
DOCUMENT_TYPE_NODE : 10
NOTATION_NODE : 12

0 件のコメント:

コメントを投稿