python衍生特征
1.衍生方式一
df=pd.DataFrame({'id':[2,2,2,3,3,5],'cur':['cur1','cur2','cur3','cur1','cur1','cur2']})
df
df_tmp=pd.crosstab(df['id'],df['cur'])
df_tmp
'''
遇到问题没人解答?小编创建了一个Python学习交流QQ群:778463939
寻找有志同道合的小伙伴,互帮互助,群里还有不错的视频学习教程和PDF电子书!
'''
df_new=pd.DataFrame({'id':list(set(df.id))})
df_new['cur1_count']=0
df_new['cur_count_all']=0
df_new['cur_cate_num']=0
for id in list(set(df.id)):
df_new.loc[df_new.id==id,'cur1_count']=df_tmp.loc[id,'cur1']
df_new.loc[df_new.id==id,'cur_count_all']=df_tmp.loc[id,'cur1':'cur3'].sum()
df_new.loc[df_new.id==id,'cur_cate_num']=len(np.where(df_tmp.loc[id,'cur1':'cur3']>0)[0])
df_new.head()