Python 解析XML

时间 2019-12-11

标签 python 解析 xml 栏目 Python 繁體版

原文原文链接

<configuration>


    <conf>
        <name>URL_to_start</name>
        <value>http://baike.baidu.com/item/python</value>
    </conf>

    <!-- 总爬取数量 默认:-1 无限制 -->
    <conf>
        <name>count</name>
        <value>10</value>
    </conf>

    <!-- 总爬取时间(分钟) 默认:-1 无限制-->
    <conf>
        <name>run_time</name>
        <value>-1</value>
    </conf>

    <!-- 线程数 默认:1 -->
    <conf>
        <name>Thread_count</name>
        <value>1</value>
    </conf>

    <!-- URL匹配规则 请使用正则表达式 -->
    <conf>
        <name>URL_re</name>
        <value>*</value>
    </conf>
</configuration>

解析node

import xml.dom.minidom


class ReadConf:
    def read(self):
        # 打开xml文档
        dom = xml.dom.minidom.parse('conf.xml')

        # 获得文档元素对象
        root = dom.documentElement

        conf_nodes = root.getElementsByTagName('conf')
        conf_dict = {}

        for conf_node in conf_nodes:
            conf_dict[conf_node.getElementsByTagName('name')[0].firstChild.data] = \
                conf_node.getElementsByTagName('value')[
                    0].firstChild.data

        return conf_dict