User and Item Based Recommender with Python

User & Item-Based Recommender System with Python

Building a movie recommender system from scratch in python using ratings and movies csv files from MovieLens small data set.

The system is inspired from the great Programming Collective Intelligence book.

import pandas as pd
import math

movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")
data = ratings.merge(movies, on = "movieId", how = "left")
data.head()

  userId movieId rating timestamp title                       genres
0 1      1       4.0    964982703 Toy Story (1995)            Adventure|Animation|Children|Comedy|Fantasy
1 1      3       4.0    964981247 Grumpier Old Men (1995)     Comedy|Romance
2 1      6       4.0    964982224 Heat (1995)                 Action|Crime|Thriller
3 1      47      5.0    964983815 Seven (a.k.a. Se7en) (1995) Mystery|Thriller
4 1      50      5.0    964982931 Usual Suspects, The (1995)  Crime|Mystery|Thriller
def nearest_neighbors(data, user, n = 5, similarity = similarity):
db = {}
for (u, m, r) in data[["userId", "title", "rating"]].values:
    mr = {m:round(r, 2)}
    if u in db:
        db[u].update(mr)
    else:
        db[u] = mr


## it is a simple euclidean dist function 
## but can be changed to any other kind of similarity func
def similarity(data, user1, user2):
    si={}
    for movie in data[user1]:
        if movie in data[user2]:
            si[movie] = 1
    if len(si)== 0:
        return 0
    sum_of_squares = round(sum([math.pow(data[user1][movie] - data[user2][movie], 2) 
                               for movie in data[user1] if movie in data[user2]]), 2)
    return 1 / (1 + sum_of_squares)

print(similarity(db, 13, 11))
# 0.5
    scores=[(similarity(data, user, user2), user2) for user2 in data if user2 != user]
    scores = sorted(scores, reverse = True)
    return scores[:n]
## nearest neighbors to user 13 and their similrities
## (similarity, neighbor)
nearest_neighbors(db, 13)
# [(1.0, 592), (1.0, 585), (1.0, 584), (1.0, 582), (1.0, 574)]
## The Recommender
def recommend(data, user, n = 5, similarity = similarity):
    totals={}
    sim_sums={}
    for user2 in data:
        if user2 == user: 
            continue
        sim = similarity(data, user, user2)
        if sim <= 0: 
            continue
        for movie in data[user2]:
            if movie not in data[user] or data[user][movie] == 0:
                totals.setdefault(movie, 0)
                totals[movie] += data[user2][movie] * sim
                sim_sums.setdefault(movie,0)
                sim_sums[movie] += sim
            
    rankings = [(round(total / sim_sums[movie], 2), movie) for movie, total in totals.items()]
    rankings = sorted(rankings, reverse = True)
    
    return rankings[:n]
## recommend 5 movies to user 91 
#(probable rating, movie)
recommend(db, 91)

# [(5.0, 'Zeitgeist: Moving Forward (2011)'),
#  (5.0, 'Wow! A Talking Fish! (1983)'),
#  (5.0, 'World of Glory (1991)'),
#  (5.0, 'Wonder Woman (2009)'),
#  (5.0, "Won't You Be My Neighbor? (2018)")]
## Flip data
def rec_transform(data):
    result={}
    for user in data:
        for movie in data[user]:
            result.setdefault(movie, {})
            result[movie][user] = data[user][movie]
    return result


movie_based_db = rec_transform(db)
## Users who would like a movie and their probable rating
## (probable rating, user to like)
recommend(movie_based_db, "Won't You Be My Neighbor? (2018)")

# [(5.0, 499), (5.0, 236), (5.0, 146), (5.0, 138), (5.0, 85)]
Featured Image Source

Share

Leave a Reply

Your email address will not be published. Required fields are marked *