y周Pandas数据透视表

fmindate = (df.fecha.astype( 'datetime64[ns]' ) >= pd.to_datetime( "2017-03-01" )) fmaxdate = (df.fecha.astype( 'datetime64[ns]' ) <= pd.to_datetime( "2018-01-15" )) dffiltered = df[ (fmindate & fmaxdate) ] txt = pd.pivot_table( dffiltered, columns=[ "fecha" ], index=[ "org", "tipo", "estado" ], values=[ "destination", "time_total", "time_avg" ], aggfunc={ "destination": len, "time_total": total_secs_inTimeSerie, "time_avg": mean_secs_inTimeSerie }, fill_value="", margins=True ) with open(report_name, "w") as text_file: text_file.write ( txt.to_html() )

2条回答

网友

1楼 · 编辑于 2024-05-17 14:20:30

您可以使用^{}：

rng = pd.date_range('2017-04-03', periods=6, freq='6M')

df = pd.DataFrame({'org':list('aaabbb'),
                   'estado':list('cccbbb'),
                   'destination':[4,5,4,5,5,4],
                   'time_total':[7,8,9,4,2,3],
                   'time_avg':[1,3,5,7,1,0],
                   'fecha':rng,
                   'tipo':list('aaabbb')})


df["fecha"] = df["fecha"].dt.weekofyear
print (df)
   destination estado  fecha org  time_avg  time_total tipo
0            4      c     17   a         1           7    a
1            5      c     44   a         3           8    a
2            4      c     18   a         5           9    a
3            5      b     44   b         7           4    b
4            5      b     18   b         1           2    b
5            4      b     44   b         0           3    b

^{pr2}$

print (txt)
                destination               time_avg                           \
fecha                    17   18   44 All       17   18        44       All   
org tipo estado                                                               
a   a    c                1  1.0  1.0   3        1  5.0  3.000000  3.000000   
b   b    b                   1.0  2.0   3           1.0  3.500000  2.666667   
All                       1  2.0  3.0   6        1  3.0  3.333333  2.833333   

                time_total                  
fecha                   17    18    44 All  
org tipo estado                             
a   a    c               7   9.0   8.0  24  
b   b    b                   2.0   7.0   9  
All                      7  11.0  15.0  33

网友

2楼 · 编辑于 2024-05-17 14:20:30

也许我的解释不够清楚。从源csv我需要一个与我显示的完全一样的透视表，但是每个星期都有一个关于源记录的范围。终于找到了解决办法，任何改进都是欢迎的。可能对其他人有用：

def week_range( date ):
    """ Utility function. Returns start and end dates for a given date (starting Monday) """
    year, week, dow = date.isocalendar()
    if dow == 7:
        start_date = date
    else:
        start_date = date - timedelta( dow )
    end_date = start_date + timedelta( 6 )
    return (start_date, end_date)

# Load and get first date (fecha) in the sorted by date csv file
df = pd.read_csv("data.csv")
nextDate = df.head(1)
nextDate = pd.to_datetime(nextDate["fecha"].values[0])
continueNextWeek = True

while continueNextWeek:
    # Get week start and end dates for the current date
    datefrom, dateto = week_range( nextDate )
    fmindate = (df.fecha.astype( 'datetime64[ns]' ) >= datefrom )
    fmaxdate = (df.fecha.astype( 'datetime64[ns]' ) <= dateto )

    # Get a by date filtered dataframe
    dfFiltered = df[ (df.tipo == "Saliente") & (fmindate & fmaxdate) ]

    # If there are records for that week, generate pivot table
    if dfFiltered.shape[0]: 
        txt = pd.pivot_table(
                dfFiltered,
                columns=[ "fecha" ],
                index=[ "org", "tipo", "estado" ],
                values=[ "destino", "duracion_total", "duracion_media" ],
                aggfunc={ "destino": len, "duracion_total": total_secs_inTimeSerie, "duracion_media": mean_secs_inTimeSerie },
                fill_value="", margins=True
        )
        # Write it to disk
        report_name = "report-{}-{}.html".format(datefrom.strftime('%Y-%m-%d'),dateto.strftime('%Y-%m-%d'))
        with open(os.path.join("out",report_name), "w") as text_file:
            text_file.write ( txt.to_html() )

    # Stop if the week end date is greater than last record in set
    if dateto > df.tail(1)["fecha"].astype( 'datetime64[ns]').values[0]:
        continueNextWeek = False
    else:
        nextDate = dateto + timedelta(1)

相关问题更多 >

编程相关推荐

热门问题

热门文章