利用 Python 实现简单的基于用户的商品推荐模型
程序员文章站
2022-07-13 13:24:35
...
利用 Python 实现简单的基于用户的商品推荐模型
设计思想:
找出与该用户具有相似购物习惯的其他用户,例如他们曾经购买过一些相同的商品,然后将其他用户购买过但是该用户未曾购买的商品推荐给他。
简单讲就是当一个用户 A 需要个性化推荐时,可以先找到和他兴趣相似的用户群体 C,然后把 C 喜欢的、并且 A 没有听说过的物品推荐给 A,也就是基于用户的商品推荐算法。
实现原理:
① 找到与目标用户兴趣相似的用户集合;
② 找到这个集合中用户喜欢的、并且目标用户没有听说过的物品推荐给目标用户。
实验结果:
实验源码:
uid_score_itemid.txt
summer,4.0 ,1
lina,5.0 ,1
tom,5.0 ,1
xixi,5.0 ,1
aliyun,5.0 ,1
summer,5.0 ,2
xiaoming,4.0 ,2
aliyun,4.0 ,2
lina,5.0 ,2
odi,5.0 ,2
summer,5.0 ,3
violetblue,5.0 ,3
frogsun,4.0 ,3
lina,4.0 ,3
Alaleio,5.0 ,3
alod,1.0 ,4
tom,1.0 ,4
aliyun,1.0 ,4
lina,1.0 ,4
keol,1.0 ,4
shih3,4.0 ,5
tom,5.0 ,5
summer,4.0 ,5
meimei,4.0 ,5
xixi,4.0 ,5
baseUserRecomment.py
# -*-coding:utf-8-*-
from math import sqrt
fp = open("uid_score_itemid.txt", "r")
users = {}
for line in open("uid_score_itemid.txt"):
lines = line.strip().split(",")
if lines[0] not in users:
users[lines[0]] = {}
users[lines[0]][lines[2]] = float(lines[1])
class recommender:
# data:数据集,这里指users
# k:表示得出最相近的k的近邻
# metric:表示使用计算相似度的方法
# n:表示推荐item的个数
def __init__(self, data, k=3, metric='pearson', n=12):
self.k = k
self.n = n
self.username2id = {}
self.userid2name = {}
self.productid2name = {}
self.metric = metric
if self.metric == 'pearson':
self.fn = self.pearson
if type(data).__name__ == 'dict':
self.data = data
def convertProductID2name(self, id):
if id in self.productid2name:
return self.productid2name[id]
else:
return id
# 定义的计算相似度的公式,用的是皮尔逊相关系数计算方法
def pearson(self, rating1, rating2):
sum_xy = 0
sum_x = 0
sum_y = 0
sum_x2 = 0
sum_y2 = 0
n = 0
for key in rating1:
if key in rating2:
n += 1
x = rating1[key]
y = rating2[key]
sum_xy += x * y
sum_x += x
sum_y += y
sum_x2 += pow(x, 2)
sum_y2 += pow(y, 2)
if n == 0:
return 0
# 皮尔逊相关系数计算公式
denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n)
if denominator == 0:
return 0
else:
return (sum_xy - (sum_x * sum_y) / n) / denominator
def computeNearestNeighbor(self, username):
distances = []
for instance in self.data:
if instance != username:
distance = self.fn(self.data[username], self.data[instance])
distances.append((instance, distance))
distances.sort(key=lambda artistTuple: artistTuple[1], reverse=True)
return distances
# 推荐算法的主体函数
def recommend(self, user):
# 定义一个字典,用来存储推荐的商品和分数
recommendations = {}
# 计算出user与所有其他用户的相似度,返回一个list
nearest = self.computeNearestNeighbor(user)
# print(nearest)
userRatings = self.data[user]
# print(userRatings)
totalDistance = 0.0
# 得住最近的k个近邻的总距离
for i in range(self.k):
totalDistance += nearest[i][1]
if totalDistance == 0.0:
totalDistance = 1.0
# 将与user最相近的k个人中user没有买过的商品推荐给user,并且这里又做了一个分数的计算排名
for i in range(self.k):
# 第i个人的与user的相似度,转换到[0,1]之间
weight = nearest[i][1] / totalDistance
# 第i个人的name
name = nearest[i][0]
# 第i个用户买过的商品和相应的打分
neighborRatings = self.data[name]
for artist in neighborRatings:
if not artist in userRatings:
if artist not in recommendations:
recommendations[artist] = (neighborRatings[artist] * weight)
else:
recommendations[artist] = (recommendations[artist] + neighborRatings[artist] * weight)
recommendations = list(recommendations.items())
recommendations = [(self.convertProductID2name(k), v) for (k, v) in recommendations]
# 做了一个排序
recommendations.sort(key=lambda artistTuple: artistTuple[1], reverse=True)
return recommendations[:self.n], nearest
def adjustrecommend(id):
itemid_list = []
r = recommender(users)
k, nearuser = r.recommend("%s" % id)
for i in range(len(k)):
itemid_list.append(k[i][0])
return itemid_list, nearuser[:3] # itemid_list推荐商品的id,nearuser[:n]最近邻的n个用户
itemid_list, near_list = adjustrecommend("lina")
print("itemid_list:", itemid_list)
print("near_list:", near_list)
上一篇: 取近似值