如何绘制两个图表?
我想展示两个图表,一个是收入最高的10%所占的份额,另一个是收入最低的10%所占的份额,但我遇到了一个问题,只显示了一个图表。
这是关于最贫困人群的数据示例: "#""德国"",""DEU"",""最低10%收入所占份额"",""SI.DST.FRST.10"","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""",""3.7"",""3.7"",""3.7"",""3.4"",""3.4"",""3.7"",""3.6"",""3.6"",""3.5"",""3.5"",""3.5"",""3.4"",""3.4"",""3.4"",""3.3"",""3.3"",""3.4"",""3.4"",""3.3"",""3.4"",""3.4"",""3.2"",""3.3"",""3.2"",""3.1"",""3.1"",""2.8"",""3.1"",""3.1"","""","""","""","
这是关于最富有的人群的数据示例: "德国,""DEU"",""最高10%收入所占份额"",""SI.DST.10TH.10"","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""","""",""23.2"",""23.1"",""22.8"",""22.9"",""22.7"",""22.3"",""22.4"",""22.3"",""23.1"",""22.9"",""23.9"",""23.7"",""23.9"",""24"",""25.1"",""24.7"",""25.1"",""24.7"",""24"",""24"",""24.5"",""24.4"",""25"",""24.1"",""24.8"",""24.6"",""24.8"",""25.2"",""25.2"","""","""","""","
我更新后的代码是:
import csv
import matplotlib.pyplot as plt
def read_income_shares(file_name, wealthiest_file_name):
income_shares = {}
countries = []
years = []
# Read data from the first file
try:
with open(file_name, 'r', encoding='utf-8') as file:
reader = csv.reader(file, quoting=csv.QUOTE_NONE)
# Skip the first 4 rows
for _ in range(4):
next(reader)
# Read the header to get the years
header = next(reader)
years = [int(year.strip('"')) for year in header[4:] if year.strip('"').isdigit()]
for i, line in enumerate(reader, start=5):
try:
country_name = line[0].strip('"')
values = []
for val in line[4:]:
val = val.replace('""""', '0').replace('"', '').strip()
if val and val.replace('.', '').isdigit():
values.append(float(val))
else:
values.append(0)
income_shares.setdefault(country_name, {}).update({'Values_Poorest': values})
if country_name not in countries:
countries.append(country_name)
except Exception as e:
print(f"Error in line {i}: {e}")
print(f"Line content: {line}")
print(f"Warning: Unexpected data structure for {country_name}")
except FileNotFoundError:
print(f"Error: File '{file_name}' not found.")
except Exception as e:
print(f"Error: An unexpected error occurred: {e}")
# Read data from the second file
try:
with open(wealthiest_file_name, 'r', encoding='utf-8') as file:
reader = csv.reader(file, quoting=csv.QUOTE_NONE)
# Skip the first 4 rows
for _ in range(4):
next(reader)
for i, line in enumerate(reader, start=5):
try:
country_name = line[0].strip('"')
values_wealthiest = []
values_poorest = []
for val in line[4:]:
val = val.replace('""""', '0').replace('"', '').strip()
if val and val.replace('.', '').isdigit():
values_poorest.append(float(val))
else:
values_poorest.append(0)
if val and val.replace('.', '').isdigit():
values_wealthiest.append(float(val))
else:
values_wealthiest.append(0)
if country_name in income_shares:
income_shares[country_name]['Values_Poorest'] = values_poorest
else:
income_shares.setdefault(country_name, {}).update({'Values_Poorest': values_poorest})
if country_name not in countries:
countries.append(country_name)
if country_name in income_shares:
income_shares[country_name]['Values_Wealthiest'] = values_wealthiest
else:
income_shares.setdefault(country_name, {}).update({'Values_Wealthiest': values_wealthiest})
if country_name not in countries:
countries.append(country_name)
except Exception as e:
print(f"Error in line {i}: {e}")
print(f"Line content: {line}")
print(f"Warning: Unexpected data structure for {country_name}")
except FileNotFoundError:
print(f"Error: File '{wealthiest_file_name}' not found.")
except Exception as e:
print(f"Error: An unexpected error occurred: {e}")
return income_shares, countries, years
def plot_income_distribution(countries):
income_data_one, _, _ = read_income_shares('C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\one.txt', 'C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt')
income_data_two, _, _ = read_income_shares('C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt', 'C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt')
formatted_countries = [] # Collect formatted country names
for country in countries:
# Cleaning up the country name
country_formatted = country.strip('" \ufeff')
formatted_countries.append(country_formatted) # Collect formatted country names
# Check if data for the country is available in both files
if country_formatted in income_data_one and country_formatted in income_data_two:
# Add debugging print statements
print("Keys for {} in one.txt: {}".format(country_formatted, income_data_one[country_formatted].keys()))
print("Keys for {} in two.txt: {}".format(country_formatted, income_data_two[country_formatted].keys()))
# Process the data for one.txt
if 'Values_Poorest' in income_data_one[country_formatted]:
income_data_poorest_one = income_data_one[country_formatted]['Values_Poorest']
# Choose only Years from 1960 to 2022
years_to_plot = list(range(1960, 2023))
# Convert values to percentage
income_data_poorest_percent_one = [val for val in income_data_poorest_one]
# Filter out values equal to 0
non_zero_years_poorest_one = [year for year, val in zip(years_to_plot, income_data_poorest_percent_one) if val > 0]
non_zero_percentages_poorest_one = [val for val in income_data_poorest_percent_one if val > 0]
# Print the data for debugging
print("Years for {} in one.txt: {}".format(country_formatted, non_zero_years_poorest_one))
print("Poorest Percentages for {} in one.txt: {}".format(country_formatted, non_zero_percentages_poorest_one))
# Plot only if data is available for the year
plt.plot(non_zero_years_poorest_one, non_zero_percentages_poorest_one, label='{} - Poorest 10%'.format(country_formatted), linestyle='dashed')
# Process the data for two.txt
if 'Values_Wealthiest' in income_data_two[country_formatted]:
income_data_wealthiest_two = income_data_two[country_formatted]['Values_Wealthiest']
# Choose only Years from 1960 to 2022
years_to_plot = list(range(1960, 2023))
# Convert values to percentage
income_data_wealthiest_percent_two = [val for val in income_data_wealthiest_two]
# Filter out values equal to 0
non_zero_years_wealthiest_two = [year for year, val in zip(years_to_plot, income_data_wealthiest_percent_two) if val > 0]
non_zero_percentages_wealthiest_two = [val for val in income_data_wealthiest_percent_two if val > 0]
# Print the data for debugging
print("Years for {} in two.txt: {}".format(country_formatted, non_zero_years_wealthiest_two))
print("Wealthiest Percentages for {} in two.txt: {}".format(country_formatted, non_zero_percentages_wealthiest_two))
# Plot only if data is available for the year
plt.plot(non_zero_years_wealthiest_two, non_zero_percentages_wealthiest_two, label='{} - Wealthiest 10%'.format(country_formatted))
plt.title('Income Distribution Over Years')
plt.xlabel('Year')
plt.ylabel('Income Share (%)')
plt.ylim(0, 100) # Set the Y-axis to 0 to 100 percent
plt.axis([1960, 2022, 0, 100])
plt.grid(True)
# Display legend only if data is present.
if any(formatted_country in income_data_one or formatted_country in income_data_two for formatted_country in formatted_countries):
plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) # Move legend outside the plot area
plt.savefig('income_distribution_plot.png', bbox_inches='tight') # Save the plot as a PNG file
plt.show()
# Example call
countries_to_plot = ['"Germany"']
plot_income_distribution(countries_to_plot)
1 个回答
我部分复现了这个问题并找到了修复方法。
首先,你的数据文件有问题,当前的格式无法被csv模块处理。csv模块在处理复杂数据时非常好用,前提是这些数据遵循csv的规则。而在你的文件中,引用符号没有正确配对。因此,每一行都被视为一个单独的字段,这和你预期的不一样。
正确的做法是修复你的数据文件,但作为一种变通方法,你可以让csv模块忽略任何引用符号,并从数据字段中去掉它们,这正是你的代码所做的。只需在打开读取器时加上 quoting=csv.QUOTE_NONE
(对两个文件都这样做):
...
reader = csv.reader(file, quoting=csv.QUOTE_NONE)
...
这样就能正确获取每行的字段数量了。
但你还有第二个问题:文件中的数据范围是0到100,但你却把它们乘以100。结果是你的数据变成了0到10000的范围,这样就会在图表外面显示了……
作为一种变通方法,你可以直接使用:
# Convert values to percentage
income_data_country_percent = [val for val in income_data_country]
income_data_wealthiest_percent = [val for val in income_data_wealthiest]
或者直接处理原始值。
不过在解决了这两个问题后,我终于得到了一个图表。
你应该从中学到的是:你的代码已经有一些调试打印。如果你再多加一些,特别是打印一下表头,你就会立刻明白你只得到了一个字段——这就是我所做的……