Python:[Errno ftp error][Errno 111]连接被拒绝

2024-05-16 04:08:42 发布

您现在位置:Python中文网/ 问答频道 /正文

import urllib

def get_page(url):
    try:
        import urllib
        return urllib.urlopen(url).read()
    except:
        return ""

def get_next_target(seed):
    start_link = seed.find('<a href=')
    if start_link == -1:
        return 0, 0
    start_quote = seed.find('"', start_link)
    end_quote = seed.find('"', start_quote + 1)
    url = seed[start_quote + 1 : end_quote ]
    return url, end_quote

def get_all_links(seed):
    links = []
    while True:
        url, remaining_page = get_next_target(seed)
        if url:
            links.append(url)
            seed = seed[remaining_page : ]
        else:
            break
    return links


def crawl_web(seed):
    tocrawl = [seed]                       
    crawled = []          
    while tocrawl:
        page = tocrawl.pop()  
        if page not in crawled:
            crawled.append(page) 
            f = urllib.urlopen(page)
            content = f.read() 
            tocrawl = tocrawl + get_all_links(content)
    print crawled

crawl_web('https://en.wikipedia.org/wiki/Software_engineering') 

我正在尝试构建web爬虫,但上面的代码显示以下错误:

click here for viewing the error

请帮忙!!你知道吗


Tags: urlgetreturnifdefpagelinklinks