
User & Item-Based Recommender System with Python
Building a movie recommender system from scratch in python using ratings and movies csv files from MovieLens small data set.
The system is inspired from the great Programming Collective Intelligence book.
import pandas as pd
import math
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")
data = ratings.merge(movies, on = "movieId", how = "left")
data.head()
userId movieId rating timestamp title genres
0 1 1 4.0 964982703 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
1 1 3 4.0 964981247 Grumpier Old Men (1995) Comedy|Romance
2 1 6 4.0 964982224 Heat (1995) Action|Crime|Thriller
3 1 47 5.0 964983815 Seven (a.k.a. Se7en) (1995) Mystery|Thriller
4 1 50 5.0 964982931 Usual Suspects, The (1995) Crime|Mystery|Thriller
def nearest_neighbors(data, user, n = 5, similarity = similarity):
db = {}
for (u, m, r) in data[["userId", "title", "rating"]].values:
mr = {m:round(r, 2)}
if u in db:
db[u].update(mr)
else:
db[u] = mr
## it is a simple euclidean dist function
## but can be changed to any other kind of similarity func
def similarity(data, user1, user2):
si={}
for movie in data[user1]:
if movie in data[user2]:
si[movie] = 1
if len(si)== 0:
return 0
sum_of_squares = round(sum([math.pow(data[user1][movie] - data[user2][movie], 2)
for movie in data[user1] if movie in data[user2]]), 2)
return 1 / (1 + sum_of_squares)
print(similarity(db, 13, 11))
# 0.5
scores=[(similarity(data, user, user2), user2) for user2 in data if user2 != user]
scores = sorted(scores, reverse = True)
return scores[:n]
## nearest neighbors to user 13 and their similrities ## (similarity, neighbor) nearest_neighbors(db, 13) # [(1.0, 592), (1.0, 585), (1.0, 584), (1.0, 582), (1.0, 574)]
## The Recommender
def recommend(data, user, n = 5, similarity = similarity):
totals={}
sim_sums={}
for user2 in data:
if user2 == user:
continue
sim = similarity(data, user, user2)
if sim <= 0:
continue
for movie in data[user2]:
if movie not in data[user] or data[user][movie] == 0:
totals.setdefault(movie, 0)
totals[movie] += data[user2][movie] * sim
sim_sums.setdefault(movie,0)
sim_sums[movie] += sim
rankings = [(round(total / sim_sums[movie], 2), movie) for movie, total in totals.items()]
rankings = sorted(rankings, reverse = True)
return rankings[:n]
## recommend 5 movies to user 91 #(probable rating, movie) recommend(db, 91) # [(5.0, 'Zeitgeist: Moving Forward (2011)'), # (5.0, 'Wow! A Talking Fish! (1983)'), # (5.0, 'World of Glory (1991)'), # (5.0, 'Wonder Woman (2009)'), # (5.0, "Won't You Be My Neighbor? (2018)")]
## Flip data
def rec_transform(data):
result={}
for user in data:
for movie in data[user]:
result.setdefault(movie, {})
result[movie][user] = data[user][movie]
return result
movie_based_db = rec_transform(db)
## Users who would like a movie and their probable rating ## (probable rating, user to like) recommend(movie_based_db, "Won't You Be My Neighbor? (2018)") # [(5.0, 499), (5.0, 236), (5.0, 146), (5.0, 138), (5.0, 85)]
Leave a Reply