Source code for responsibly.we.benchmark

"""
Evaluate word embedding by standard benchmarks.

Reference:
    - https://github.com/kudkudak/word-embeddings-benchmarks


Word Pairs Tasks
~~~~~~~~~~~~~~~~

1. The WordSimilarity-353 Test Collection
   http://www.cs.technion.ac.il/~gabr/resources/data/wordsim353/

2. Rubenstein, H., and Goodenough, J. 1965. Contextual correlates of synonymy
   https://www.seas.upenn.edu/~hansens/conceptSim/

3. Stanford Rare Word (RW) Similarity Dataset
   https://nlp.stanford.edu/~lmthang/morphoNLM/

4. The Word Relatedness Mturk-771 Test Collection
   http://www2.mta.ac.il/~gideon/datasets/mturk_771.html

5. The MEN Test Collection
   http://clic.cimec.unitn.it/~elia.bruni/MEN.html

6. SimLex-999
   https://fh295.github.io/simlex.html

7. TR9856
   https://www.research.ibm.com/haifa/dept/vst/files/IBM_Debater_(R)_TR9856.v2.zip


Analogies Tasks
~~~~~~~~~~~~~~~

1. Google Analogies (subset of WordRep)
   https://code.google.com/archive/p/word2vec/source

2. MSR - Syntactic Analogies
   http://research.microsoft.com/en-us/projects/rnn/

"""

import os

import pandas as pd
from pkg_resources import resource_filename


WORD_PAIRS_TASKS = {'WS353': 'wordsim353.tsv',
                    'RG65': 'RG_word.tsv',
                    'RW': 'rw.tsv',
                    'Mturk': 'MTURK-771.tsv',
                    'MEN': 'MEN_dataset_natural_form_full.tsv',
                    'SimLex999': 'SimLex-999.tsv',
                    'TR9856': 'TermRelatednessResults.tsv'}

ANALOGIES_TASKS = {'MSR-syntax': 'MSR-syntax.txt',
                   'Google': 'questions-words.txt'}

PAIR_WORDS_EVALUATION_FIELDS = ['pearson_r', 'pearson_pvalue',
                                'spearman_r', 'spearman_pvalue',
                                'ratio_unkonwn_words']


def _get_data_resource_path(filename):
    return resource_filename(__name__, os.path.join('data',
                                                    'benchmark',
                                                    filename))


def _prepare_word_pairs_file(src, dst, delimiter='\t'):
    """Transform formats of word pairs files to tsv."""
    df = pd.read_csv(src, header=None, delimiter=delimiter)
    df.loc[:, :2].to_csv(dst, sep=delimiter, index=False, header=False)


[docs]def evaluate_word_pairs(model, kwargs_word_pairs=None): """ Evaluate word pairs tasks. :param model: Word embedding. :param kwargs_word_pairs: Kwargs for evaluate_word_pairs method. :type kwargs_word_pairs: dict or None :return: :class:`pandas.DataFrame` of evaluation results. """ if kwargs_word_pairs is None: kwargs_word_pairs = {} results = {} for name, filename in WORD_PAIRS_TASKS.items(): path = _get_data_resource_path(filename) (pearson, spearman, ratio_unknown_words) = model.evaluate_word_pairs(path, **kwargs_word_pairs) # pylint: disable=C0301 results[name] = {'pearson_r': pearson[0], 'pearson_pvalue': pearson[1], 'spearman_r': spearman.correlation, 'spearman_pvalue': spearman.pvalue, 'ratio_unkonwn_words': ratio_unknown_words} df = (pd.DataFrame(results) .reindex(PAIR_WORDS_EVALUATION_FIELDS) .transpose() .round(3)) return df
[docs]def evaluate_word_analogies(model, kwargs_word_analogies=None): """ Evaluate word analogies tasks. :param model: Word embedding. :param kwargs_word_analogies: Kwargs for evaluate_word_analogies method. :type evaluate_word_analogies: dict or None :return: :class:`pandas.DataFrame` of evaluation results. """ if kwargs_word_analogies is None: kwargs_word_analogies = {} results = {} for name, filename in ANALOGIES_TASKS.items(): path = _get_data_resource_path(filename) overall_score, _ = model.evaluate_word_analogies(path, **kwargs_word_analogies) # pylint: disable=C0301 results[name] = {'score': overall_score} df = (pd.DataFrame(results) .transpose() .round(3)) return df
[docs]def evaluate_word_embedding(model, kwargs_word_pairs=None, kwargs_word_analogies=None): """ Evaluate word pairs tasks and word analogies tasks. :param model: Word embedding. :param kwargs_word_pairs: Kwargs fo evaluate_word_pairs method. :type kwargs_word_pairs: dict or None :param kwargs_word_analogies: Kwargs for evaluate_word_analogies method. :type evaluate_word_analogies: dict or None :return: Tuple of DataFrame for the evaluation results. """ return (evaluate_word_pairs(model, kwargs_word_pairs), evaluate_word_analogies(model, kwargs_word_analogies))