关于Vivino.com上的网络抓取信息,我有两个问题: 1.)使用下面的代码,我可以从Vivino网站上获取信息和评论,但是我希望获得英文评论,或者只获得英文评论。有办法做到这一点吗? 2.)目前我只从葡萄牙进口葡萄酒,但我想要不同国家的葡萄酒。如果我删除'country_codes[]:'pt',代码将不再工作。我怎样才能解决这个问题
有人能帮我吗?多谢各位
#!/usr/bin/env python
# coding: utf-8
# Import packages
import requests
import json
import pandas as pd
# Get request from the Vivino website
def get_wine_data(wine_id, year, page):
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
}
api_url = f"https://www.vivino.com/api/wines/{wine_id}/reviews?per_page=50&year={year}&page={page}"
print(api_url)
data = requests.get(api_url, headers=headers).json()
return data
# Get request from the Vivino website
r = requests.get(
"https://www.vivino.com/api/explore/explore",
params={
"country_code": "FR",
"country_codes[]": "pt",
"currency_code": "EUR",
"grape_filter": "varietal",
"min_rating": "1",
"order_by": "price",
"order": "asc",
"page": 1,
"price_range_max": "500",
"price_range_min": "0",
"wine_type_ids[]": "1",
},
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
},
)
# Variables to scrap from the Vivino website
results = [
(
t["vintage"]["wine"]["winery"]["name"],
t["vintage"]["year"],
t["vintage"]["wine"]["id"],
f'{t["vintage"]["wine"]["name"]} {t["vintage"]["year"]}',
t["vintage"]["statistics"]["ratings_average"],
t["vintage"]["statistics"]["ratings_count"],
t["prices"][0]["amount"],
t["vintage"]["wine"]["style"]["acidity"],
t["vintage"]["wine"]["style"]["blurb"],
t["vintage"]["wine"]["style"]["body"],
t["vintage"]["wine"]["style"]["body_description"],
t['vintage']['wine']['region']['country']['name'],
t['vintage']['wine']['style']['description'],
t['vintage']['wine']['style']['food'][0]['name'],
t['vintage']['wine']['style']['food'][1]['name'],
t['vintage']['wine']['style']['food'][2]['name'],
t['vintage']['wine']['style']['food'][3]['name'],
t['vintage']['wine']['style']['country']['most_used_grapes'][0]['name'],
t['vintage']['wine']['region']['country']['most_used_grapes'][1]['name'],
t['vintage']['wine']['region']['country']['most_used_grapes'][2]['name'],
t['vintage']['wine']['taste']['structure']['acidity'],
t['vintage']['wine']['taste']['structure']['calculated_structure_count'],
t['vintage']['wine']['taste']['structure']['intensity'],
t['vintage']['wine']['taste']['structure']['sweetness'],
t['vintage']['wine']['taste']['structure']['tannin'],
t['vintage']['wine']['taste']['structure']['user_structure_count'],
t['vintage']['wine']['taste']['flavor'][0]['group'],
t['vintage']['wine']['taste']['flavor'][0]['stats']['count'],
t['vintage']['wine']['taste']['flavor'][1]['group'],
t['vintage']['wine']['taste']['flavor'][1]['stats']['count'],
t['vintage']['wine']['taste']['flavor'][2]['group'],
t['vintage']['wine']['taste']['flavor'][2]['stats']['count'],
t['vintage']['wine']['taste']['flavor'][3]['group'],
t['vintage']['wine']['taste']['flavor'][3]['stats']['count'],
t['vintage']['wine']['taste']['flavor'][4]['group'],
t['vintage']['wine']['taste']['flavor'][4]['stats']['count'],
t['vintage']['wine']['taste']['flavor'][5]['group'],
t['vintage']['wine']['taste']['flavor'][5]['stats']['count'],
t['vintage']['wine']['taste']['flavor'][6]['group'],
t['vintage']['wine']['taste']['flavor'][6]['stats']['count']
)
for t in r.json()["explore_vintage"]["matches"]
]
###
for t in r.json()["explore_vintage"]["matches"][0:2]:
wine_id = t["vintage"]["wine"]["id"]
year = t["vintage"]["year"],
with open(f'output-wine{wine_id}-year{year}.json', 'w+') as f:
json.dump(t, f, indent=4, sort_keys=True)
###
# Saving the results in a dataframe
dataframe = pd.DataFrame(
results,
columns=["Winery", "Year", "Wine ID", "Wine", "Rating", "num_review", "price", "acidity",'Blurb','Body','Body_des','country','wine_des','food_1','food_2','food_3','food_4','grape_1','grape_2','grape_3','acidity_score','calculated_structure_count','intensity','sweetness','tannin','user_structure_count', 'flavor_1', 'flavor_1_count','flavor_2', 'flavor_2_count','flavor_3', 'flavor_3_count','flavor_4', 'flavor_4_count','flavor_5', 'flavor_5_count','flavor_6', 'flavor_6_count','flavor_7', 'flavor_7_count'],
)
# Scraping the reviews from the Vivino website
ratings = []
for _, row in dataframe.iterrows(): ######## Ik heb hier head(2) toegevoegd om te testen zodat hij maar twee wijnen doet
page = 1
while True:
print(
f'Getting info about wine {row["Wine ID"]}-{row["Year"]} Page {page}'
)
d = get_wine_data(row["Wine ID"], row["Year"], page)
if not d["reviews"]:
break
for r in d["reviews"]:
ratings.append(
[
row["Year"],
row["Wine ID"],
r["rating"],
r["note"],
r["created_at"],
]
)
page += 1
ratings = pd.DataFrame(
ratings, columns=["Year", "Wine ID", "User Rating", "Note", "CreatedAt"]
)
# Merging the two datasets; results and ratings.
df_out = ratings.merge(dataframe)
df_out.to_csv("data.csv", index=False)
要仅获取英文评论,您可以按
"language"
键进行筛选。英语评论设置为en
要获得更多国家的葡萄酒,您可以将代码放入
"country_codes[]"
键中。前例["pt", "es", "fr"]
相关问题 更多 >
编程相关推荐