python pymysql.err.operational错误:(2013,“查询期间与MySQL服务器的连接丢失”)

2024-04-26 17:19:26 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在抓取一个网站,然后将数据存储到mysql中,代码运行良好,但在一段时间后,它会给出以下错误。我使用python 3.5.1和pymysql连接到数据库。

pymysql.err.OperationalError: (2013, 'Lost connection to MySQL server during query')

这是我的代码:

from bs4 import BeautifulSoup
import urllib.request
import re
import json
import pymysql
import pymysql.cursors


connection = pymysql.connect(host='XXX.XXX.XXX.XX',
                             user='XXX',
                             password='XXX',
                             db='XXX',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

r = urllib.request.urlopen('http://i.cantonfair.org.cn/en/ExpExhibitorList.aspx?k=glassware')
soup = BeautifulSoup(r, "html.parser")

links = soup.find_all("a", href=re.compile(r"expexhibitorlist\.aspx\?categoryno=[0-9]+"))
linksfromcategories = ([link["href"] for link in links])

string = "http://i.cantonfair.org.cn/en/"
linksfromcategories = [string + x for x in linksfromcategories]


for link in linksfromcategories:

  response = urllib.request.urlopen(link)
  soup2 = BeautifulSoup(response, "html.parser")

  links2 = soup2.find_all("a", href=re.compile(r"\ExpExhibitorList\.aspx\?categoryno=[0-9]+"))
  linksfromsubcategories = ([link["href"] for link in links2])

  linksfromsubcategories = [string + x for x in linksfromsubcategories]
  for link in linksfromsubcategories:

        response = urllib.request.urlopen(link)
        soup3 = BeautifulSoup(response, "html.parser")
        links3 = soup3.find_all("a", href=re.compile(r"\ExpExhibitorList\.aspx\?categoryno=[0-9]+"))
        linksfromsubcategories2 = ([link["href"] for link in links3])

        linksfromsubcategories2 = [string + x for x in linksfromsubcategories2]
        for link in linksfromsubcategories2:

              response2 = urllib.request.urlopen(link)
              soup4 = BeautifulSoup(response2, "html.parser")
              companylink = soup4.find_all("a", href=re.compile(r"\expCompany\.aspx\?corpid=[0-9]+"))
              companylink = ([link["href"] for link in companylink])
              companydetail = soup4.find_all("div", id="contact")
              companylink = [string + x for x in companylink]
              my_list = list(set(companylink))

              for link in my_list:
                  print (link)
                  response3 = urllib.request.urlopen(link)
                  soup5 = BeautifulSoup(response3, "html.parser")
                  companydetail = soup5.find_all("div", id="contact")                      
                  for d in companydetail:
                        lis = d.find_all('li')
                        companyname = lis[0].get_text().strip()
                        companyaddress = lis[1].get_text().strip()
                        companycity = lis[2].get_text().strip()
                        try:
                            companypostalcode = lis[3].get_text().strip()
                            companypostalcode = companypostalcode.replace(",","")                                
                        except:
                            companypostalcode = lis[3].get_text().strip()
                        try:
                            companywebsite = lis[4].get_text().strip()
                            companywebsite = companywebsite.replace("\xEF\xBC\x8Cifl...","")
                        except IndexError:
                            companywebsite = 'null'


                        try:
                            with connection.cursor() as cursor:


                                print ('saving company details to db')
                                cursor.execute("""INSERT INTO company(
                                                                       companyname,address,city,pincode,website) 
                                                                   VALUES (%s, %s, %s, %s, %s)""",
                                                                   (companyname, companyaddress, companycity, 
                                                                    companypostalcode, companywebsite))
                            connection.commit()

                        finally:
                            print ("Company Data saved")
                  productlink = soup5.find_all("a", href=re.compile(r"\ExpProduct\.aspx\?corpid=[0-9]+.categoryno=[0-9]+"))
                  productlink = ([link["href"] for link in productlink])

                  productlink = [string + x for x in productlink]
                  productlinkun = list(set(productlink))
                  for link in productlinkun:

                      print (link)
                      responseproduct = urllib.request.urlopen(link)
                      soupproduct = BeautifulSoup(responseproduct, "html.parser")
                      productname = soupproduct.select('div[class="photolist"] li a')
                      for element in productname:
                          print ("====================Product Name=======================")
                          productnames = element.get_text().strip()
                          print (productnames)
                          try:
                              with connection.cursor() as cursor:

                                  # Create a new record
                                  print ('saving products to db')
                                  cursor.execute("""INSERT INTO products(
                                                                       companyname,products) 
                                                                   VALUES (%s, %s)""",
                                                                   (companyname, productnames))
                                  connection.commit()

                          finally:
                              print ("Products Data Saved")

现在我不知道我的代码哪里出错了


Tags: textinforgetrequestlinkallfind
1条回答
网友
1楼 · 发布于 2024-04-26 17:19:26

希望能有所帮助:

while True:  #it works until the data was not saved
    try:
        with connection.cursor() as cursor:


            print ('saving company details to db')
            cursor.execute("""INSERT INTO company(
                                                   companyname,address,city,pincode,website) 
                                               VALUES (%s, %s, %s, %s, %s)""",
                                               (companyname, companyaddress, companycity, 
                                                companypostalcode, companywebsite))
        connection.commit()
        break
    except OperationalError:
        connection.ping(True)
print ("Company Data saved")

您还可以看到使用连接池的类似question

或者阅读source

相关问题 更多 >