pandas 实现rfm模型
import pandas as pd import numpy as np df = pd.read_csv('./zue_164466.csv') df['ptdate'] = pd.to_datetime(df['ptdate'],format='%Y-%m-%d') df['dateDiff'] = pd.to_datetime('today')-df['ptdate'] df['dateDiff'] = df['dateDiff'].dt.days R_Agg = df.groupby(by=['user_email','product_name'])['dateDiff'].agg({'RecencyAgg': np.min}) F_Agg = df.groupby(by=['user_email','product_name'])['ptdate'].agg({'FrequencyAgg': np.size}) M_Agg = df.groupby(by=['user_email','product_name'])['totalcost'].agg({'MonetaryAgg': np.sum}) aggData = R_Agg.join(F_Agg).join(M_Agg) bins = aggData.RecencyAgg.quantile(q=[0, 0.2, 0.4, 0.6, 0.8, 1],interpolation='nearest') bins[0] = 0 labels = [5, 4, 3, 2, 1] R_S = pd.cut(aggData.RecencyAgg, bins, labels=labels) # bins = aggData.FrequencyAgg.quantile(q=[0, 0.2, 0.4, 0.6, 0.8, 1],interpolation='nearest') bins[0] = 0 labels = [1, 2, 3, 4, 5] F_S = pd.cut(aggData.FrequencyAgg, bins, labels=labels) bins = aggData.MonetaryAgg.quantile(q=[0, 0.2, 0.4, 0.6, 0.8, 1],interpolation='nearest') bins[0] = 0 labels = [1, 2, 3, 4, 5] M_S = pd.cut(aggData.MonetaryAgg,bins, labels=labels) aggData['R_S']=R_S aggData['F_S']=F_S aggData['M_S']=M_S aggData['RFM'] = 100*R_S.astype(int) + 10*F_S.astype(int) + 1*M_S.astype(int) bins = aggData.RFM.quantile(q=[0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1],interpolation='nearest') bins[0] = 0 labels = [1, 2, 3, 4, 5, 6, 7, 8] aggData['level'] = pd.cut(aggData.RFM, bins, labels=labels) aggData = aggData.reset_index() fe = aggData.sort_values(['level', 'RFM'], ascending=[0, 0]) dd=aggData.groupby(by=['level'])['user_email','product_name'].agg({'size':np.size}) print(fe.head()) fe.to_csv('./rfm_data.csv',index=False) print("---------------") print(dd)

更多精彩