从数据集Python获取缺少的datetime数据系列

import pandas as pd import numpy as np # testing data data = pd.read_excel("testing.xlsx") # Set Attribute Date to Pandas Datetime data['Date'] = pd.to_datetime(data['Date']) # Round Time into minutes data['Date'] = pd.Series(data['Date']).dt.round("1min") # Remove duplicates keeping one data.drop_duplicates(subset ="Date", keep = "first", inplace = True) # Change date order, because date_range increment month and not day date['Date'] = data['Date'].dt.strftime('%d-%m-%Y %H:%M:%S') # Data Range between initial date and final date date_range = pd.date_range(date['Date'][date.index[0]], date['Date'][date.index[-1]], freq='1Min') # Transform the column (it's a string) to datetime type dt = pd.to_datetime(date['Date']) # create datetime index passing the datetime series dato = pd.DatetimeIndex(dt.values) df = pd.DataFrame(np.random.randint(1, 20, (dato.shape[0], 1))) df.index = dato # set index df_missing = df.drop(df.between_time('02:12', '02:14').index) #check for missing datetimeindex values based on reference index (with all values) missing_dates = df.index[~df.index.isin(date.index)] print(missing_dates)

DatetimeIndex(['2019-04-01 02:00:00', '2019-04-01 02:01:00', '2019-04-01 02:02:00', '2019-04-01 02:03:00', '2019-04-01 02:04:00', '2019-04-01 02:05:00', '2019-04-01 02:06:00', '2019-04-01 02:07:00', '2019-04-01 02:08:00', '2019-04-01 02:09:00', '2019-04-01 02:11:00', '2019-04-01 02:12:00', '2019-04-01 02:13:00', '2019-04-01 02:14:00', '2019-04-01 02:15:00', '2019-04-01 02:16:00'], dtype='datetime64[ns]', freq=None)

1条回答

网友

1楼 · 发布于 2024-05-14 13:13:13

不知道为什么你需要执行所有的重新索引等-也许你可以澄清一下。指定日期范围的比较工作非常好，如下所示：

import pandas as pd
import numpy as np

# create sample data
df = pd.DataFrame({'t': ['2019-04-01 02:00:00', '2019-04-01 02:01:00',
                         '2019-04-01 02:02:00', '2019-04-01 02:03:00',
                         '2019-04-01 02:04:00', '2019-04-01 02:05:00',
                         '2019-04-01 02:06:00', '2019-04-01 02:07:00',
                         '2019-04-01 02:08:00', '2019-04-01 02:09:00',
                         '2019-04-01 02:11:00', '2019-04-01 02:12:00',
                         '2019-04-01 02:13:00', '2019-04-01 02:14:00',
                         '2019-04-01 02:15:00', '2019-04-01 02:16:00'],
                   'y': np.arange(16)})
df['t'] = pd.to_datetime(df['t'])

# set the time column as index
df = df.set_index(['t'])

# target: date_range 1 min steps
tgt = pd.date_range(df.index[0], df.index[-1], freq='min')

# now use .isin() and negate to get the timestamps that are missing in df
print(tgt[~tgt.isin(df.index)])
# DatetimeIndex(['2019-04-01 02:10:00'], dtype='datetime64[ns]', freq='T')

相关问题更多 >

编程相关推荐

热门问题

热门文章