conditional sums for pandas aggregate
To complement unutbu's answer, here's an approach using apply
on the groupby object.
>>> df.groupby('A_id').apply(lambda x: pd.Series(dict( sum_up=(x.B == 'up').sum(), sum_down=(x.B == 'down').sum(), over_200_up=((x.B == 'up') & (x.C > 200)).sum()))) over_200_up sum_down sum_upA_id a1 0 0 1a2 0 1 0a3 1 0 2a4 0 0 0a5 0 0 0
There might be a better way; I'm pretty new to pandas, but this works:
import pandas as pdimport numpy as npdf = pd.DataFrame({'A_id':'a1 a2 a3 a3 a4 a5'.split(), 'B': 'up down up up left right'.split(), 'C': [100, 102, 100, 250, 100, 102]})df['D'] = (df['B']=='up') & (df['C'] > 200)grouped = df.groupby(['A_id'])def sum_up(grp): return np.sum(grp=='up')def sum_down(grp): return np.sum(grp=='down')def over_200_up(grp): return np.sum(grp)result = grouped.agg({'B': [sum_up, sum_down], 'D': [over_200_up]})result.columns = [col[1] for col in result.columns]print(result)
yields
sum_up sum_down over_200_upA_id a1 1 0 0a2 0 1 0a3 2 0 1a4 0 0 0a5 0 0 0
An old question; I feel a better way, and avoiding the apply, would be to create a new dataframe, before grouping and aggregating:
df = df.set_index('A_id')outcome = {'sum_up' : df.B.eq('up'), 'sum_down': df.B.eq('down'), 'over_200_up' : df.B.eq('up') & df.C.gt(200)}outcome = pd.DataFrame(outcome).groupby(level=0).sum()outcome sum_up sum_down over_200_upA_id a1 1 0 0a2 0 1 0a3 2 0 1a4 0 0 0a5 0 0 0