下载Web目录中的所有图像

from init import * from bs4 import BeautifulSoup import urllib import urllib.request # use this image scraper from the location that #you want to save scraped images to def make_soup(url): html = urllib.request.urlopen(url) return BeautifulSoup(html, features="html.parser") def get_images(url): soup = make_soup(url) #this makes a list of bs4 element tags images = [img for img in soup.findAll('img')] print (str(len(images)) + "images found.") print ('Downloading images to current working directory.') #compile our unicode list of image links image_links = [each.get('src') for each in images] for each in image_links: filename=each.split('/')[-1] urllib.request.Request(each, filename) return image_links #a standard call looks like this get_images('https://omabilder.000webhostapp.com/img/')

7images found. Downloading images to current working directory. Traceback (most recent call last): File "C:\Users\MyPC\Desktop\oma projekt\getpics.py", line 1, in <module> from init import * File "C:\Users\MyPC\Desktop\oma projekt\init.py", line 9, in <module> from getpics import * File "C:\Users\MyPC\Desktop\oma projekt\getpics.py", line 26, in <module> get_images('https://omabilder.000webhostapp.com/img/') File "C:\Users\MyPC\Desktop\oma projekt\getpics.py", line 22, in get_images urllib.request.Request(each, filename) File "C:\Users\MyPC\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 328, in __init__ self.full_url = url File "C:\Users\MyPC\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 354, in full_url self._parse() File "C:\Users\MyPC\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 383, in _parse raise ValueError("unknown url type: %r" % self.full_url) ValueError: unknown url type: '/icons/blank.gif'

1条回答

网友

1楼 · 发布于 2024-04-20 07:02:42

gif是网站链接旁边的图标（小到20x20像素的图像）。它们实际上在网站上有显示。如果我理解正确的话，你想下载png图片这些是链接，而不是你提供的url上的图片。你知道吗

如果您想从链接下载png图像，那么可以使用以下内容：

from bs4 import BeautifulSoup
import urllib
import urllib.request
import os
# use this image scraper from the location that 
#you want to save scraped images to

def make_soup(url):
    html = urllib.request.urlopen(url)
    return BeautifulSoup(html, features="html.parser")

def get_images(url):
    soup = make_soup(url)
    # get all links (start with "a")
    images  = [link["href"] for link in soup.find_all('a', href=True)]
    # keep ones that end with png
    images = [im for im in images if im.endswith(".png")]    
    print (str(len(images)) + " images found.")
    print ('Downloading images to current working directory.')
    #compile our unicode list of image links
    for each in images:
        urllib.request.urlretrieve(os.path.join(url, each), each)
    return images

# #a standard call looks like this
get_images('https://omabilder.000webhostapp.com/img/')

相关问题更多 >

编程相关推荐

热门问题

热门文章