print(''' - You are given a recommendations.csv file with user ids and you are supposed to recommend atmost 10 songs. - The training data is provided in train.csv file. - Your entries in the recommendations.csv file should be such that for each row the first value must be user_id followed by recommended song_ids all seperated by comma. - Make sure you have atleast one recommendation for each user in recommendations.csv or else your score will be zero - The recommended songs to a user must be different from what you already have in the training set for the same user. - A user can have at most 10 recommendations. # Load the data train_data = load_train_data("train.csv") recommendations_data = load_recommendations_data("recommendations.csv") # Preprocess the training data user_songs = preprocess_train_data(train_data) # Build a recommendation model (e.g., collaborative filtering) model = build_recommendation_model(train_data) # Generate recommendations for each user in recommendations.csv with open("recommendations.csv", "w") as recommendations_file: for user_id in recommendations_data['user_id']: recommendations = generate_recommendations(user_id, model, user_songs) recommendations = limit_recommendations(recommendations) formatted_recommendations = format_recommendations(user_id, recommendations) recommendations_file.write(formatted_recommendations + "\n") def preprocess_train_data(train_data): """ Preprocess the training data to create a set of songs already listened to by each user. Args: - train_data (DataFrame): DataFrame containing the training data with columns user_id and song_id. Returns: - user_songs (dict): Dictionary where keys are user_ids and values are sets of song_ids listened to by each user. """ user_songs = {} for index, row in train_data.iterrows(): user_id = row['user_id'] song_id = row['song_id'] if user_id not in user_songs: user_songs[user_id] = set() user_songs[user_id].add(song_id) return user_songs from surprise import Dataset, Reader, KNNBasic from surprise.model_selection import train_test_split def build_recommendation_model(train_data): """ Build a recommendation model using collaborative filtering. Args: - train_data (DataFrame): DataFrame containing the training data with columns user_id, song_id, and rating. Returns: - model (surprise.prediction_algorithms.algo_base.AlgoBase): Trained recommendation model. """ # Create a Surprise Dataset object reader = Reader(rating_scale=(0, 1)) # Since we only need binary rating (listened or not) data = Dataset.load_from_df(train_data[['user_id', 'song_id', 'rating']], reader) # Split the data into training and testing sets trainset, _ = train_test_split(data, test_size=0.0) # Use full data for training # Build the recommendation model using KNNBasic algorithm (or any other algorithm of your choice) model = KNNBasic(sim_options={'user_based': False}) # Item-based collaborative filtering # Train the model model.fit(trainset) return model import pandas as pd from surprise import Dataset, Reader, KNNBasic from surprise.model_selection import train_test_split # Load the training data train_data = pd.read_csv("train.csv") def preprocess_train_data(train_data): """ Preprocess the training data to create a set of songs already listened to by each user. Args: - train_data (DataFrame): DataFrame containing the training data with columns user_id and song_id. Returns: - user_songs (dict): Dictionary where keys are user_ids and values are sets of song_ids listened to by each user. """ user_songs = {} for index, row in train_data.iterrows(): user_id = row['user_id'] song_id = row['song_id'] if user_id not in user_songs: user_songs[user_id] = set() user_songs[user_id].add(song_id) return user_songs def build_recommendation_model(train_data): """ Build a recommendation model using collaborative filtering. Args: - train_data (DataFrame): DataFrame containing the training data with columns user_id, song_id, and rating. Returns: - model (surprise.prediction_algorithms.algo_base.AlgoBase): Trained recommendation model. """ # Create a Surprise Dataset object reader = Reader(rating_scale=(0, 1)) # Since we only need binary rating (listened or not) data = Dataset.load_from_df(train_data[['user_id', 'song_id', 'rating']], reader) # Split the data into training and testing sets trainset, _ = train_test_split(data, test_size=0.0) # Use full data for training # Build the recommendation model using KNNBasic algorithm (or any other algorithm of your choice) model = KNNBasic(sim_options={'user_based': False}) # Item-based collaborative filtering # Train the model model.fit(trainset) return model # Preprocess the training data user_songs = preprocess_train_data(train_data) # Build the recommendation model recommendation_model = build_recommendation_model(train_data) print("Recommendation model built successfully!") import pandas as pd from sklearn.neighbors import NearestNeighbors from scipy.sparse import csr_matrix # Load the training data train_data = pd.read_csv("train.csv") # Preprocess the training data user_songs = preprocess_train_data(train_data) # Create a sparse user-item interaction matrix user_item_matrix = train_data.pivot(index='user_id', columns='song_id', values='rating').fillna(0) user_item_sparse = csr_matrix(user_item_matrix.values) # Build the recommendation model using NearestNeighbors model = NearestNeighbors(metric='cosine', algorithm='brute') model.fit(user_item_sparse) print("Recommendation model built successfully!") ''')
Write, Run & Share Python code online using OneCompiler's Python online compiler for free. It's one of the robust, feature-rich online compilers for python language, supporting both the versions which are Python 3 and Python 2.7. Getting started with the OneCompiler's Python editor is easy and fast. The editor shows sample boilerplate code when you choose language as Python or Python2 and start coding.
OneCompiler's python online editor supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample python program which takes name as input and print your name with hello.
import sys
name = sys.stdin.readline()
print("Hello "+ name)
Python is a very popular general-purpose programming language which was created by Guido van Rossum, and released in 1991. It is very popular for web development and you can build almost anything like mobile apps, web apps, tools, data analytics, machine learning etc. It is designed to be simple and easy like english language. It's is highly productive and efficient making it a very popular language.
When ever you want to perform a set of operations based on a condition IF-ELSE is used.
if conditional-expression
#code
elif conditional-expression
#code
else:
#code
Indentation is very important in Python, make sure the indentation is followed correctly
For loop is used to iterate over arrays(list, tuple, set, dictionary) or strings.
mylist=("Iphone","Pixel","Samsung")
for i in mylist:
print(i)
While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.
while condition
#code
There are four types of collections in Python.
List is a collection which is ordered and can be changed. Lists are specified in square brackets.
mylist=["iPhone","Pixel","Samsung"]
print(mylist)
Tuple is a collection which is ordered and can not be changed. Tuples are specified in round brackets.
myTuple=("iPhone","Pixel","Samsung")
print(myTuple)
Below throws an error if you assign another value to tuple again.
myTuple=("iPhone","Pixel","Samsung")
print(myTuple)
myTuple[1]="onePlus"
print(myTuple)
Set is a collection which is unordered and unindexed. Sets are specified in curly brackets.
myset = {"iPhone","Pixel","Samsung"}
print(myset)
Dictionary is a collection of key value pairs which is unordered, can be changed, and indexed. They are written in curly brackets with key - value pairs.
mydict = {
"brand" :"iPhone",
"model": "iPhone 11"
}
print(mydict)
Following are the libraries supported by OneCompiler's Python compiler
Name | Description |
---|---|
NumPy | NumPy python library helps users to work on arrays with ease |
SciPy | SciPy is a scientific computation library which depends on NumPy for convenient and fast N-dimensional array manipulation |
SKLearn/Scikit-learn | Scikit-learn or Scikit-learn is the most useful library for machine learning in Python |
Pandas | Pandas is the most efficient Python library for data manipulation and analysis |
DOcplex | DOcplex is IBM Decision Optimization CPLEX Modeling for Python, is a library composed of Mathematical Programming Modeling and Constraint Programming Modeling |