1 Star 0 Fork 2

李清风/某宝推荐系统设计

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
preprocess_2.py 3.26 KB
一键复制 编辑 原始数据 按行查看 历史
王柏 提交于 2022-01-10 03:19 . first commit
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split
def prediction(df,userdf,Nn=15):#Nn邻居个数
corr=df.T.corr()#建立用户相似度的协方差矩阵
rats=userdf.copy()
for usrid in userdf.index:
dfnull=df.loc[usrid][df.loc[usrid].isnull()]
usrv=df.loc[usrid].mean()#评价平均值
for i in range(len(dfnull)):
nft=(df[dfnull.index[i]]).notnull()
#获取邻居列表
if(Nn<=len(nft)):
nlist=df[dfnull.index[i]][nft][:Nn]
else:
nlist=df[dfnull.index[i]][nft][:len(nft)]
nlist=nlist[corr.loc[usrid,nlist.index].notnull()]
nratsum=0
corsum=0
if(0!=nlist.size):
nv=df.loc[nlist.index,:].T.mean()#邻居评价平均值
for index in nlist.index:
ncor=corr.loc[usrid,index]
nratsum+=ncor*(df[dfnull.index[i]][index]-nv[index])
corsum+=abs(ncor)
if(corsum!=0):
rats.at[usrid,dfnull.index[i]]= usrv + nratsum/corsum
else:
rats.at[usrid,dfnull.index[i]]= usrv
else:
rats.at[usrid,dfnull.index[i]]= 0
return rats
#推荐
def recomm(df,userdf,Nn=15,TopN=1):
ratings=prediction(df,userdf,Nn)#获取预测评分
recomm=[]#存放推荐结果
for usrid in userdf.index:
#获取按NA值获取未评分项
ratft=userdf.loc[usrid].isnull()
ratnull=ratings.loc[usrid][ratft]
#对预测评分进行排序
if(len(ratnull)>=TopN):
sortlist=(ratnull.sort_values(ascending=False)).index[:TopN]
else:
sortlist=ratnull.sort_values(ascending=False).index[:len(ratnull)]
recomm.append(sortlist)
return ratings,recomm
# 读取数据
# df = pd.read_csv('UserBehavior.csv.zip',header=None)
df = pd.concat((chunk for chunk in pd.read_csv('UserBehavior.csv.zip',header=None, chunksize = 10000)))
#给数据加上列名称
df.columns = ['userid', 'num', 'proid', 'D','E']
print(df.head(5))
data1 = df.drop(['D','E'],axis=1)
data = data1.drop(df[df['userid'] > 10000].index)
print(data.shape)
print(data.head(5))
traindata, testdata = train_test_split(data,test_size=0.2, random_state=1)
traindata1 = traindata.groupby(['userid','proid'], as_index= False).count()
testdata1 = testdata.groupby(['userid','proid'], as_index= False).count()
print(traindata1.head(5))
print(testdata1.head(5))
traindf = traindata1.pivot(index = 'userid', columns= 'proid', values= 'num')
print(traindf.head(5))
testdf = testdata1.pivot(index= 'userid', columns= 'proid', values= 'num')
print(testdf.head(5))
traindf.rename(index={i:'usr%d'%(i) for i in traindf.index} , inplace=True)
traindf.rename(columns={i:'pro%d'%(i) for i in traindf.columns} , inplace=True)
testdf.rename(index={i:'usr%d'%(i) for i in testdf.index} , inplace=True)
testdf.rename(columns={i:'pro%d'%(i) for i in testdf.columns} , inplace=True)
userdf=traindf.loc[testdf.index]
#获取预测评分和推荐列表
trainnums,trainrecomm=recomm(traindf,userdf)
print(trainnums)
print(trainrecomm)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/li-qfeng/shopping-recommandation.git
[email protected]:li-qfeng/shopping-recommandation.git
li-qfeng
shopping-recommandation
某宝推荐系统设计
master

搜索帮助