User & Item-Based Recommender System with Python
Building a movie recommender system from scratch in python using ratings and movies csv files from MovieLens small data set.
Programming Collective Intelligence book.
The system is inspired from the greatimport pandas as pd import math movies = pd.read_csv("movies.csv") ratings = pd.read_csv("ratings.csv") data = ratings.merge(movies, on = "movieId", how = "left") data.head() userId movieId rating timestamp title genres 0 1 1 4.0 964982703 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy 1 1 3 4.0 964981247 Grumpier Old Men (1995) Comedy|Romance 2 1 6 4.0 964982224 Heat (1995) Action|Crime|Thriller 3 1 47 5.0 964983815 Seven (a.k.a. Se7en) (1995) Mystery|Thriller 4 1 50 5.0 964982931 Usual Suspects, The (1995) Crime|Mystery|Thriller
def nearest_neighbors(data, user, n = 5, similarity = similarity): db = {} for (u, m, r) in data[["userId", "title", "rating"]].values: mr = {m:round(r, 2)} if u in db: db[u].update(mr) else: db[u] = mr ## it is a simple euclidean dist function ## but can be changed to any other kind of similarity func def similarity(data, user1, user2): si={} for movie in data[user1]: if movie in data[user2]: si[movie] = 1 if len(si)== 0: return 0 sum_of_squares = round(sum([math.pow(data[user1][movie] - data[user2][movie], 2) for movie in data[user1] if movie in data[user2]]), 2) return 1 / (1 + sum_of_squares) print(similarity(db, 13, 11)) # 0.5
scores=[(similarity(data, user, user2), user2) for user2 in data if user2 != user] scores = sorted(scores, reverse = True) return scores[:n]
## nearest neighbors to user 13 and their similrities ## (similarity, neighbor) nearest_neighbors(db, 13) # [(1.0, 592), (1.0, 585), (1.0, 584), (1.0, 582), (1.0, 574)]
## The Recommender def recommend(data, user, n = 5, similarity = similarity): totals={} sim_sums={} for user2 in data: if user2 == user: continue sim = similarity(data, user, user2) if sim <= 0: continue for movie in data[user2]: if movie not in data[user] or data[user][movie] == 0: totals.setdefault(movie, 0) totals[movie] += data[user2][movie] * sim sim_sums.setdefault(movie,0) sim_sums[movie] += sim rankings = [(round(total / sim_sums[movie], 2), movie) for movie, total in totals.items()] rankings = sorted(rankings, reverse = True) return rankings[:n]
## recommend 5 movies to user 91 #(probable rating, movie) recommend(db, 91) # [(5.0, 'Zeitgeist: Moving Forward (2011)'), # (5.0, 'Wow! A Talking Fish! (1983)'), # (5.0, 'World of Glory (1991)'), # (5.0, 'Wonder Woman (2009)'), # (5.0, "Won't You Be My Neighbor? (2018)")]
## Flip data def rec_transform(data): result={} for user in data: for movie in data[user]: result.setdefault(movie, {}) result[movie][user] = data[user][movie] return result movie_based_db = rec_transform(db)
## Users who would like a movie and their probable rating ## (probable rating, user to like) recommend(movie_based_db, "Won't You Be My Neighbor? (2018)") # [(5.0, 499), (5.0, 236), (5.0, 146), (5.0, 138), (5.0, 85)]
Leave a Reply