将多个csv文件读取到单独的数据帧循环中

import pandas as pd station_id = 'id.csv' input_file = 'filename.txt' unformatted = 'C:/Users/....../Unformatted/' formatted = 'C:/....../Formatted/' print(f'\nReading data file: {input_file}.') fields = { 'Timestamp': 'timestamp', # 'Sample Point Name': 'station_name', # 'Sample Point Name Description': 'station_description', # 'Start Date':'state_date', 'PM10 (1h) Validated': 'PM_1h_10_ug_m3', 'PM10 Validated' :'PM_10_ug_m3', # 'PM2.5 (1h) Final': 'pm_25', # 'PM2.5 Final': 'pm2.5_ug_m3' } df = pd.read_table(unformatted+input_file, usecols=fields.keys(), sep='\t', encoding = 'utf-16') df.rename(columns=fields, inplace=True) df.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], dayfirst=True) df['date'] = df['timestamp'] df['time'] = df['timestamp'] df['date'] = df['date'].dt.strftime('%d/%m/%Y') df['time'] = df['time'].apply(lambda z: z.strftime('%H%M')) df['Date_Time'] = df['date'] +' '+ df['time'] df.drop(['timestamp', 'date', 'time'], axis=1, inplace=True) df = df[['Date_Time', 'PM_1h_10_ug_m3', 'PM_10_ug_m3']] availability_PM_1h = df['PM_1h_10_ug_m3'].count()/df['Date_Time'].count()*100 availability_PM_10_min = df['PM_10_ug_m3'].count()/df['Date_Time'].count()*100 #Check for nan values PM10_nan = df['PM_10_ug_m3'].isnull().sum() PM10_1h_nan = df['PM_1h_10_ug_m3'].isnull().sum() print('Count of PM10 NaN: ' + str(PM10_nan)) print('Count of PM10_1h NaN: ' + str(PM10_1h_nan)) df.to_csv(formatted+station_id, index=False)

1条回答

网友

1楼 · 发布于 2024-05-14 05:52:21

假设您将整个代码包装为一个函数中的单个文件：read_single_df(filepath)。然后，多个文件的代码如下所示：

# filepaths: this is the variable to store the filepaths to all files as a list

import os
import pandas as pd
from typing import List

def read_csv(filepath: str, *args, **kwargs) -> pd.DataFrame:
    """Reads a single csv file and processes it before returning 
       a `pandas.DataFrame`.
    """
    # your logic for a single file goes here 
    df = pd.read_csv(os.path.abspath(filepath))

    # further processing steps for a single file...
    # ...
    return df

# define a list to store dataframes
dfs: List[pd.DataFrame] = []

# run loop to read and store dataframes in the list: dfs
for filepath in filepaths:
    dfs.append(read_csv(filepath))

现在，您可以将列表dfs中的每个数据帧调用为dfs[0]、dfs[1]等，并在下游应用进一步的处理

对代码的一些改进建议：

下面一行是您所需要的，而不是那六行

df['Date_Time'] = df['timestamp'].dt.strftime('%d/%m/%Y %H%M')

对代码的一些改进建议：

相关问题更多 >

编程相关推荐

热门问题

热门文章