3 Star 9 Fork 0

fengliang4616/Recommender

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
SlopeOne.py 6.70 KB
一键复制 编辑 原始数据 按行查看 历史
fengliang4616 提交于 2020-04-16 18:38 . 2020-04-16
# -*- coding: utf-8 -*-
"""
# @Author : FengLiang
# @Time : 2020/4/11 14:04
# @File : SlopeOne.py
"""
from operator import itemgetter
import numpy as np
import pandas as pd
# data_file = 'ratings.csv'
data_file = 'text3.csv'
class SlopeOneCF:
def __init__(self):
self.data = pd.read_csv(data_file, usecols=range(3))
self.data.columns = ['user', 'item', 'rating']
self.train = {}
self.test = {}
self.frequencies = {}
self.deviations = {}
@staticmethod
def _process_data(input_data):
"""
自定义数据处理函数
:param input_data: DataFrame
:return: dict{user_id: {item_id: rating}}
"""
output_data = {}
for _, items in input_data.iterrows():
user = int(items['user'])
item = int(items['item'])
rating = float(items['rating'])
if user in output_data.keys():
currentRatings = output_data[user]
else:
currentRatings = {}
currentRatings[item] = rating
output_data[user] = currentRatings
return output_data
def load_data(self, train_size, normalize):
"""
划分训练集、测试集,并定义数据结构为:dict{user_id: {item_id: rating}}
:param train_size:
:param normalize:
:return:
"""
print('loading data')
if normalize:
# 利用pandas对整列进行归一化,评分在(0,1)之间
rating = self.data['rating']
self.data['rating'] = (rating - rating.min()) / (rating.max() - rating.min())
train_data = self.data.sample(frac=train_size, random_state=10, axis=0)
test_data = self.data[~self.data.index.isin(train_data.index)]
self.train = self._process_data(train_data)
self.test = self._process_data(test_data)
print('loaded data finish')
def compute_deviations(self):
"""
计算物品和物品之间的评分偏差
:return:
"""
print('computing all deviations')
for ratings in self.train.values():
for (item, rating) in ratings.items():
self.frequencies.setdefault(item, {})
self.deviations.setdefault(item, {})
for (item2, rating2) in ratings.items():
if item != item2:
self.frequencies[item].setdefault(item2, 0)
self.deviations[item].setdefault(item2, 0.0)
self.frequencies[item][item2] += 1 # 物品出现的次数
self.deviations[item][item2] += rating - rating2 # 物品评分差
for (item, ratings) in self.deviations.items():
for item2 in ratings:
ratings[item2] /= self.frequencies[item][item2] # 计算物品评分差的均值
print('computed all deviations finish')
def predict(self, userRatings):
"""
对用户进行推荐结果
:param userRatings:
:return:
"""
recommendations = {} # 存储推荐结果
frequencies = {} # 物品被不同用户访问的记录
for (userItem, userRating) in userRatings.items():
for (diffItem, diffRatings) in self.deviations.items():
if diffItem not in userRatings and \
userItem in self.deviations[diffItem]:
freq = self.frequencies[diffItem][userItem]
recommendations.setdefault(diffItem, 0.0)
frequencies.setdefault(diffItem, 0)
recommendations[diffItem] += (diffRatings[userItem] + userRating) * freq
frequencies[diffItem] += freq
for (k, v) in recommendations.items():
recommendations[k] = v / frequencies[k]
return recommendations
def validate(self):
"""
计算MAE、RMSE评估指标
:return:
"""
print('calculating MAE and RMSE')
error_sum = 0.0
sqrError_sum = 0.0
setSum = 0
count = 0
# i = 0
for user in self.test:
# i += 1
# if i % 100 == 0:
# print('calculating %d users' % i)
recommendation = self.predict(self.train[user]).copy()
count += len(recommendation.items())
userRatings = self.test[user]
for item in recommendation:
if item in userRatings:
error_sum += abs(userRatings[item] - recommendation[item])
sqrError_sum += (userRatings[item] - recommendation[item]) ** 2
setSum += 1
mae = error_sum / setSum
rmse = np.sqrt(sqrError_sum / setSum)
return mae, rmse
def evaluate(self):
"""
根据测试集中所有用户进行推荐topN结果,并计算precision和recall
:param N:
:return:
"""
print('calculating top N result')
hit = 0
recall_sum = 0
precision_sum = 0
# i = 0
for user in self.test:
# i += 1
# if i % 100 == 0:
# print('calculating %d users' % i)
real_items = self.test.get(user) # 真实的items
recommendation = self.predict(self.train[user]).copy()
item_list = [(item, rating) for item, rating in recommendation.items()]
item_list.sort(key=lambda x: x[1], reverse=True)
pred_items = [i[0] for i in item_list]
hit += len([i for i in pred_items if i in real_items]) # 预测正确的items
precision_sum += len(pred_items)
recall_sum += len(real_items)
precision = hit / (precision_sum * 1.0)
recall = hit / (recall_sum * 1.0)
return precision, recall
def get_top_n(self, user, top_n=10):
"""
输入user_id,预测返回topN的结果
:param user:
:param top_n:
:return:
"""
recommendation = self.predict(self.train[user]).copy()
item_list = [(item, rating) for item, rating in recommendation.items()]
item_list.sort(key=lambda x: x[1], reverse=True)
top_list = item_list[:top_n] # 预测的items
return top_list
if __name__ == '__main__':
slope_one = SlopeOneCF()
train_range = [0.9, 0.8, 0.7, 0.6, 0.5]
for i in train_range:
print('数据集:', i)
slope_one.load_data(train_size=i, normalize=True)
slope_one.compute_deviations()
mae, rmse = slope_one.validate()
print('MAE:', mae, 'RMSE:', rmse)
pre, rec = slope_one.evaluate()
print('precision:', pre, 'recall:', rec)
# res = slope_one.get_top_n(user=1, top_n=10)
# print(res)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/fengliang4616/Recommender.git
[email protected]:fengliang4616/Recommender.git
fengliang4616
Recommender
Recommender
master

搜索帮助