编译时发生py2exe错误

# -*- coding: utf-8 -*- import lxml,cookielib,urllib,configobj,sys,getopt,string,mechanize,time,os from lxml import etree from lxml.html import parse, fromstring import sys, getopt, string import lxml.html br = mechanize.Browser() cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) # Follows refresh 0 but not hangs on refresh > 0 br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # Want debugging messages? br.set_debug_http(False) br.set_debug_redirects(False) br.set_debug_responses(False) # User-Agent (this is cheating, ok?) br.addheaders = [('User-agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322)')] SAVEFILE= 'betextract.txt' post_count = 0 mac = '' def getMacAddress(): if sys.platform == 'win32': for line in os.popen("ipconfig /all"): if line.lstrip().startswith('Physical Address'): mac = line.split(':')[1].strip().replace('-',':') break else: for line in os.popen("/sbin/ifconfig"): if line.find('Ether') > -1: mac = line.split()[4] break return mac print mac getMacAddress() print mac def safeunicode(s): s = str(s).decode('utf-8') try: return s.encode('euc-kr').decode('cp949') except UnicodeDecodeError: return s #check_demo() #from configobj import ConfigObj Template 화일 불러오기 ini config = configobj.ConfigObj('config.ini') section1 = config['NAVERPASS'] section2 = config['NAVERID'] section3 = config['Nblogkeyword'] section4 = config['end_line'] section5 = config['Content'] section6 = config['HongboSubject'] section7 = config['HongboBody'] NAVERPASS = section1['NAVERPASS'] NAVERID = section2['NAVERID'] Nblogkeyword = section3['Nblogkeyword'] end_line = section4['end_line'] Content = section5['Content'] HongboSubject = section6['HongboSubject'] HongboBody = section7['HongboBody'] enkw = str(Nblogkeyword).decode('cp949') #아래부분에서 빼기를 위한 int로 변환 end_line = int(section4['end_line']) start_line = 0 while end_line: #end_line = end_line - 9 form = { 'where': 'post', 'sm' : 'ab_pge', 'query' : enkw, 'st' : 'sim', 'date_option' : '-1', 'date_from' : '', 'date_to' : '', 'dup_remove' : '1', 'post_blogid' : '', 'post_blogurl' : '', 'post_blogurl_without' : '', 'detail_and_query' : '', 'detail_not_query' : '', 'detail_or_query' : '' , 'detail_udp_query' : '', 'srchby' : 'all', 'nso' : 'so%3Ar%2Ca%3Aall%2Cp%3A', 'ie' : 'utf8', 'start' : start_line } qstring = urllib.urlencode(form) f = urllib.urlopen('http://cafeblog.search.naver.com/search.naver?%s' %qstring) html = f.read() f.close() start_line += 10 end_line = end_line - 10 s= [] html = lxml.html.fromstring(html) save = open(SAVEFILE, 'w+') for content in html.cssselect('li.sh_blog_top'): try: subject = content.cssselect('dl dt a.sh_blog_title b')[0].text_content() body = content.cssselect('dl dd.sh_blog_passage')[0].text_content() print u'[+추출중+] %s | %s ' %(subject , body) chen = '%s|%s' %(subject, body) #중요 이런식으로 처리를 해야함 꼭 인코딩! title2 = chen.encode('cp949') save.write(title2 + '\n') except Exception, err: sys.stderr.write(u'에러발생 => 에러 자동처리중... %s\n' % str(err)) content = '' break save.close() #print subject , body #s.append(subject) #s.append(body) #print '|'.join(s) ## Show the response headers #print br.info() ## or ##print br.response().info() #for link in br.links(): #print link br.open('http://nid.naver.com/nidlogin.login') #for f in br.forms(): #print f br.select_form(nr=0) br.form['id']=NAVERID br.form['pw']=NAVERPASS #br.click(type="submit", nr=0) #print br.forms() #br.submit(name="URL", nr=0) #html = br.response().read() #print html br.form.action='https://nid.naver.com/nidlogin.login' #javascript source analysis!! have to find inside javascript source br.submit() html = br.response().read() #decoded = br.response().read().decode('utf-8') #print html br.open('http://m.blog.naver.com/') save = open(SAVEFILE) for line in save: sub = line.split('|')[0] con = line.split('|')[1].replace('\n', '') #print sub, con br.open('http://m.blog.naver.com/PostWriteForm.nhn?blogId=ylgwn&categoryNo=') #print br.response().read() #for f in br.forms(): #print f br.select_form(nr=0) entest = "%s" %(sub) br.form['post.title']= sub.decode('cp949') + HongboSubject.decode('cp949') br.form['post.contents.contentsValue']= con.decode('cp949') + HongboBody.decode('cp949') #req = br.click_link(text=u'확인') #br.open(req) #br.form.click(kind="clickable") #for link in br.links(): #print link #br.follow_link(nr=1 #br.follow_link(text=u"확인") #req = br.click(type="submit") #br.open(req) br.form.action='http://m.blog.naver.com/PostWrite.nhn' br.submit() post_count += 1 print str(post_count ) +u'개 글올리기 성공!!' save.close() print u'블로그 글올리기 완료!'

1条回答

网友

1楼 · 发布于 2024-05-19 03:39:32

在我看来，您正在将gzip压缩的数据打印到命令行。可能是因为服务器返回的数据在编译为exe时没有被urllib解压。在

尝试手动删除Accept-Encoding头，这将阻止服务器返回压缩数据并防止脚本失败。您还可以尝试使用urllib2或其他解决方案从web下载数据。在

相关问题更多 >

编程相关推荐

热门问题

热门文章