#!/usr/bin/env python
import urllib
import mechanize
from bs4 import BeautifulSoup
from urlparse import urlparse
def getPic(search):
search = search.replace(" ","%20")
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.addheaders = [('User-Agent','Mozilla')]
htmltext = browser.open("https://www.google.com/search?site=&tbm=isch&source=hp&biw=1855&bih=990&q=" + search + "&oq=" +search)
img_url = []
formatted_images = []
soup = BeautifulSoup(htmltext)
results = soup.findAll("a")
for r in results:
try:
if "imgres?imgurl" in r['href']:
img_url.append(r['href'])
except:
a=0
for im in img_url:
refer_url = urlparse(str(img_url[0]))
return refer_url.query.split("&")[0].replace("imgurl=","")
return formatted_images
except:
print "error"
print getPic("occupy wall street")
我没有把图像的链接作为输出,而是将“[]”作为输出。Can有人知道我的代码有什么问题。在
Google只向带有JavaScript的浏览器发送
"imgres?imgurl"
文件但是
mechanize.Browser()
就像没有JavaScript的浏览器。在关闭浏览器中的JavaScript并查看Google发送的HTML。在
相关问题 更多 >
编程相关推荐