Python HTMLParser:属性

# -*- coding: utf-8 -*- from HTMLParser import HTMLParser import urllib2 import sys reload(sys) sys.setdefaultencoding('utf-8') class MyHTMLParser(HTMLParser): def __init__(self): self.terms = [] self.definitions = [] def handle_starttag(self, tag, attrs): # retrive the terms if tag == 'div': for attribute, value in attrs: if value == 'word': self.terms.append(attrs[1][1]) # retrive the definitions if value == 'desc': if attrs[1][1]: self.definitions.append(attrs[1][1]) else: self.definitions.append(None) parser = MyHTMLParser() # open page and retrive source page response = urllib2.urlopen('http://localhost/') html = response.read().decode('utf-8') response.close() # extract the terms and definitions parser.feed(html)

2条回答

网友

1楼 · 编辑于 2024-06-16 11:33:16

我认为你没有正确初始化HTMLParser。也许你根本不需要初始化它。这对我有用：

# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


class MyHTMLParser(HTMLParser):  
    def handle_starttag(self, tag, attrs):
        print "Encountered a start tag:", tag
        # retrive the terms
        if tag == 'div':
            for attribute, value in attrs:
                if value == 'word':
                    self.terms.append(attrs[1][1])
        # retrive the definitions
                if value == 'desc':
                    if attrs[1][1]:
                        self.definitions.append(attrs[1][1])
                    else:
                        self.definitions.append(None)


parser = MyHTMLParser()
# open page and retrive source page
response = urllib2.urlopen('http://localhost/')
html = response.read().decode('utf-8')
response.close()

# extract the terms and definitions
parser.feed(html)

更新

^{pr2}$

输出：

['center'，'left']

网友

2楼 · 编辑于 2024-06-16 11:33:16

好的，我得到了解决方案，super().__init__不能工作，必须硬编码名称

def __init__(self):
        HTMLParser.__init__(self)

在主.py

输出

相关问题更多 >

编程相关推荐

热门问题

热门文章