Python Mechanize select_form（）ParseError:选项在select之外

File "C:\Python27\lib\site-packages\mechanize\_mechanize.py", line 499, in select_form global_form = self._factory.global_form File "C:\Python27\lib\site-packages\mechanize\_html.py", line 544, in __getattr__ self.forms() File "C:\Python27\lib\site-packages\mechanize\_html.py", line 557, in forms self._forms_factory.forms()) File "C:\Python27\lib\site-packages\mechanize\_html.py", line 237, in forms _urlunparse=_rfc3986.urlunsplit, File "C:\Python27\lib\site-packages\mechanize\_form.py", line 845, in ParseResponseEx _urlunparse=_urlunparse, File "C:\Python27\lib\site-packages\mechanize\_form.py", line 982, in _ParseFileEx fp.feed(data) File "C:\Python27\lib\site-packages\mechanize\_form.py", line 759, in feed _sgmllib_copy.SGMLParser.feed(self, data) File "C:\Python27\lib\site-packages\mechanize\_sgmllib_copy.py", line 110, in feed self.goahead(0) File "C:\Python27\lib\site-packages\mechanize\_sgmllib_copy.py", line 144, in goahead k = self.parse_starttag(i) File "C:\Python27\lib\site-packages\mechanize\_sgmllib_copy.py", line 302, in parse_starttag self.finish_starttag(tag, attrs) File "C:\Python27\lib\site-packages\mechanize\_sgmllib_copy.py", line 347, in finish_starttag self.handle_starttag(tag, method, attrs) File "C:\Python27\lib\site-packages\mechanize\_sgmllib_copy.py", line 387, in handle_starttag method(attrs) File "C:\Python27\lib\site-packages\mechanize\_form.py", line 736, in do_option _AbstractFormParser._start_option(self, attrs) File "C:\Python27\lib\site-packages\mechanize\_form.py", line 481, in _start_option raise ParseError("OPTION outside of SELECT") ParseError: OPTION outside of SELECT

<html lang="en-us" xml:lang="en-us" xmlns="http://www.w3.org/1999/xhtml"> <head> I omitted this section </head> <body class="login"> <div id="container"> <div id="header" style="background-color: #13397A;"> <div id="content" class="colM"> <div id="content-main"> <form id="login-form" method="post" action="/admin/"> <div style="display:none"> <input type="hidden" value="8a689f2e3d215a3465f1bb66e037d1a5" name="csrfmiddlewaretoken"> </div> <div class="form-row"> <label class="required" for="id_username">Username:</label> <input id="id_username" type="text" maxlength="30" name="username"> </div> <div class="form-row"> <label class="required" for="id_password">Password:</label> <input id="id_password" type="password" name="password"> <input type="hidden" value="1" name="this_is_the_login_form"> <input type="hidden" value="/admin/" name="next"> </div> <div class="submit-row"> <label> </label> <input type="submit" value="Log in"> </div> </form> <script type="text/javascript"> </div> <br class="clear"> </div> <div id="footer"></div> </div> <script type="text/javascript"> </body> </html>

1条回答

网友

1楼 · 发布于 2024-04-27 04:56:18

我也遇到了同样的问题（不幸的是还没有解决），我发现了这段有趣的代码，可能会有所帮助

来自http://comments.gmane.org/gmane.comp.python.wwwsearch.general/1991

import   mechanize
from   BeautifulSoup   import  BeautifulSoup  

class SanitizeHandler(mechanize.BaseHandler):
    def http_response(self, request, response):
        if not hasattr(response, "seek"):
            response = mechanize.response_seek_wrapper(response)
        #if    HTML   used   get   it though  a    robust  Parser    like  BeautifulSoup

        if response.info().dict.has_key('content-type') and ('html' in response.info().dict['content-type']):
            soup = BeautifulSoup(response.get_data())
            response.set_data(soup.prettify())
        return response

br = mechanize.Browser()
br.add_handler(SanitizeHandler())

# Now    you get    good   HTML

这应该覆盖http_response方法并“清理”html。在

相关问题更多 >

编程相关推荐

热门问题

热门文章