Source code for testfm.evaluation.evaluator

# -*- coding: utf-8 -*-
"""
Created on 23 January 2014

Evaluator for the test.fm framework

.. moduleauthor:: Linas
"""
__author__ = 'linas'

from random import sample
from math import sqrt
from testfm.evaluation.cutil.measures import MAPMeasure
from testfm.models.cutil.interface import IFactorModel
from concurrent.futures import ThreadPoolExecutor
from collections import Counter
from multiprocessing import cpu_count
from testfm.models.cutil.interface import NOGILModel
from testfm.evaluation.cutil.evaluator import evaluate_model


def partial_measure(user, entries, factor_model, all_items, non_relevant_count, measure, k=None):
    #if isinstance(factor_model, IFactorModel):
    #    return factor_model.partial_measure(user, entries, all_items, non_relevant_count, measure)
    if non_relevant_count is None:
        # Add all items except relevant
        ranked_list = [(False, factor_model.get_score(user, nr)) for nr in all_items if nr not in entries['item']]
    else:
        #2. inject #non_relevant random items
        nr_items = [i for i in all_items if i not in entries['item']]
        ranked_list = [(False, factor_model.get_score(user, nr))
                       for nr in sample(nr_items, non_relevant_count \
                if len(nr_items) > non_relevant_count else len(nr_items))]
    #2. add all relevant items from the testing_data
    ranked_list += [(True, factor_model.get_score(user, i)) for i in entries['item']]

        #shuffle(ranked_list)  # Just to make sure we don't introduce any bias (AK: do we need this?)

    #number of relevant items
    n = entries['item'].size
    #5. sort according to the score
    ranked_list.sort(key=lambda x: x[1], reverse=True)

    #6. evaluate according to each measure
    if k is None:
        return {measure.name: measure.measure(ranked_list, n=n)}
    else:
        return {measure.name: measure.measure(ranked_list[:k], n=n)}


[docs]class Evaluator(object): """ Takes the model,testing data and evaluation measure and spits out the score. """ def __init__(self, use_multi_threading=True): self.use_muilti = use_multi_threading
[docs] def evaluate_model(self, factor_model, testing_data, measures=None, all_items=None, non_relevant_count=100, k=None): """ Evaluate the model using some testing data in pandas.DataFrame. The Evaluator check if the model in evaluation is able to be executed with multi-threading. If so it executes a low level routine using C-Threads otherwise execute a single thread routine. :param factor_model: An instance that Should implement IModel :param measures: List of measure we want to compute. They should implement IMeasure. Default: MAPMeasure :param all_items: List of items available in the data set (used for negative sampling). If set to None, only testing items will be used. :param non_relevant_count: int number of non relevant items to add to the list for performance evaluation :return: List of score corresponding to measures """ measures = measures or [MAPMeasure()] #all_items = all_items or testing_dataframe.item.unique() if all_items is None: all_items = testing_data.item.unique() #1. for each user: grouped = testing_data.groupby('user') if self.use_muilti and isinstance(factor_model, NOGILModel): return [e/len(grouped) for e in evaluate_model(factor_model, testing_data, measures, all_items, non_relevant_count, k)] #return self.evaluate_model_multiprocessing(factor_model, testing_data, measures=measures, all_items=all_items, # non_relevant_count=non_relevant_count, k=k) # compute results = [partial_measure(user, entries, factor_model, all_items, non_relevant_count, m, k) \ for user, entries in grouped for m in measures] #print [v["MAPMeasure"] for v in results] partial_measures = sum((Counter(r) for r in results), Counter()) #7.average the scores for each user return [partial_measures[measure.name]/len(grouped) for measure in measures]
[docs] def evaluate_model_rmse(self, model, testing_data): """ This is just a hack to evaluate RMSE. Nobody should bother with RMSE anymore, so no good support for it. """ sum = 0.0 for idx, row in testing_data.iterrows(): p = model.get_score(row['user'], row['item']) sum += (p - float(row['rating'])) ** 2 return sqrt(sum/len(testing_data))
if __name__ == "__main__": import doctest doctest.testmod()