如何更改正在抓取的html的日期格式？

#Import libraries import urllib2 from bs4 import BeautifulSoup import datetime #create output document f = open('CarbonPrice.txt','a') #create soup soup = BeautifulSoup(urllib2.urlopen('https://www.theice.com/marketdata/DelayedMarkets.shtml?productId=3418&hubId=4080').read()) table = soup.find('table', {"class":"data default borderless"}) #Find and record time try: first_th = table.find('th') second_th = first_th.findNext('th') if second_th.contents[0] == 'Time': td_tag = table.find('td', text = 'Dec13') next_td_tag = td_tag.findNext('td') timevar = next_td_tag.contents[0] else: third_th = second_th.findNext('th') if third_th.contents[0] == 'Time': td_tag = table.find('td', text = 'Dec13') next_td_tag = td_tag.findNext('td') third_td_tag = next_td_tag.findNext('td') timevar = third_td_tag.contents[0] else: fourth_th = third_th.findNext('th') if fourth_th.contents[0] == 'Time': td_tag = table.find('td', text = 'Dec13') next_td_tag = td_tag.findNext('td') third_td_tag = next_td_tag.findNext('td') fourth_td_tag = third_td_tag.findNext('td') timevar = fourth_td_tag.contents[0] else: fifth_th = fourth_th.findNext('th') if fifth_th.contents[0] == 'Time': td_tag = table.find('td', text = 'Dec13') next_td_tag = td_tag.findNext('td') third_td_tag = next_td_tag.findNext('td') fourth_td_tag = third_td_tag.findNext('td') fifth_td_tag = fourth_td_tag.findNext('td') timevar = fifth_td_tag.contents[0] else: f.write ('Error') f.write (timevar) except AttributeError: f.write('Error') f.write('\n') f.close()

2条回答

网友

1楼 · 编辑于 2024-04-27 10:00:29

您的代码中存在各种问题。您应该尝试使用loop，这样就不需要重复相同的代码五次。在

对于BeautifulSoup，可以使用函数find_all，而不是find，来查找标记的所有出现。在

而且BeautifulSoup显然是以特定的格式解析时间，所以完成任务的一种方法就是解析beauthoulsoup返回的字符串。在

我把你的代码改了很多：

#Import libraries
import urllib2
from bs4 import BeautifulSoup
import datetime

#create soup
soup = BeautifulSoup(urllib2.urlopen('https://www.theice.com/marketdata/DelayedMarkets.shtml?productId=3418&hubId=4080').read())
table = soup.find('table', {"class":"data default borderless"})

#Find and record time
time_idx = -1
for idx, th in enumerate(table.find_all('th')):
    # Find the column index of Time
    if th.get_text() == 'Time':
        time_idx = idx
        break

timevar = []
for tr in table.find_all('tr'):
    # Extract the content of each column in a list
    td_contents = [td.get_text() for td in tr.find_all('td')]
    # If this row matches our requirement, take the Time column
    if 'Dec13' in td_contents:
        time_str = td_contents[time_idx]
        # This will capture Thu Dec 05 16:26:24 EST 2013 GMT, convert to datetime object
        time_obj = datetime.datetime.strptime(time_str,'%a %b %d %H:%M:%S EST %Y GMT')
        timevar.append(datetime.datetime.strftime(time_obj,'%x'))

#create output document
with open('CarbonPrice.txt','a') as f:
    f.write(timevar[0])

网友

2楼 · 编辑于 2024-04-27 10:00:29

以下是一种方法：

>>> import time
>>> date_time = 'Thu Dec 05 16:26:24 EST 2013 GMT'
>>> year = time.strptime(date_time, "%a %b %d %H:%M:%S EST %Y GMT").tm_year
>>> month = time.strptime(date_time, "%a %b %d %H:%M:%S EST %Y GMT").tm_mon
>>> day = time.strptime(date_time, "%a %b %d %H:%M:%S EST %Y GMT").tm_mday
>>> print("%i/%i/%i"%(month, day, year))
12/5/2013

相关问题更多 >

编程相关推荐

热门问题

热门文章