Source code for responsibly.we.bias

# pylint: disable=too-many-lines
"""
Measuring and adjusting bias in word embedding by Bolukbasi (2016).

References:
    - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
      & Kalai, A. T. (2016).
      `Man is to computer programmer as woman is to homemaker?
      debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
      In Advances in neural information processing systems
      (pp. 4349-4357).

    - The code and data is based on the GitHub repository:
      https://github.com/tolga-b/debiaswe (MIT License).

    - Gonen, H., & Goldberg, Y. (2019).
      `Lipstick on a Pig:
      Debiasing Methods Cover up Systematic Gender Biases
      in Word Embeddings But do not Remove Them
      <https://arxiv.org/abs/1903.03862>`_.
      arXiv preprint arXiv:1903.03862.

    - Nissim, M., van Noord, R., van der Goot, R. (2019).
      `Fair is Better than Sensational: Man is to Doctor
      as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.

Usage
~~~~~

.. code:: python

   >>> from responsibly.we import GenderBiasWE
   >>> from gensim import downloader
   >>> w2v_model = downloader.load('word2vec-google-news-300')
   >>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
   >>> w2v_gender_bias_we.calc_direct_bias()
   0.07307904249481942
   >>> w2v_gender_bias_we.debias()
   >>> w2v_gender_bias_we.calc_direct_bias()
   1.7964246601064155e-09

Types of Bias
~~~~~~~~~~~~~

Direct Bias
^^^^^^^^^^^

1. Associations
    Words that are closer to one end (e.g., *he*) than to
    the other end (*she*).
    For example, occupational stereotypes (page 7).
    Calculated by
    :meth:`~responsibly.we.bias.BiasWordEmbedding.calc_direct_bias`.

2. Analogies
    Analogies of *he:x::she:y*.
    For example analogies exhibiting stereotypes (page 7).
    Generated by
    :meth:`~responsibly.we.bias.BiasWordEmbedding.generate_analogies`.


Indirect Bias
^^^^^^^^^^^^^

Projection of a neutral words into a two neutral words direction
is explained in a great portion by a shared bias direction projection.

Calculated by
:meth:`~responsibly.we.bias.BiasWordEmbedding.calc_indirect_bias`
and
:meth:`~responsibly.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.

"""

import copy
import warnings

import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.svm import LinearSVC
from tabulate import tabulate
from tqdm import tqdm

from responsibly.consts import RANDOM_STATE
from responsibly.utils import _warning_setup
from responsibly.we.benchmark import evaluate_word_embedding
from responsibly.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE
from responsibly.we.utils import (
    assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
    generate_words_forms, get_seed_vector, most_similar, normalize,
    plot_clustering_as_classification, project_params, project_reject_vector,
    project_vector, reject_vector, round_to_extreme,
    take_two_sides_extreme_sorted, update_word_vector,
)


DIRECTION_METHODS = ['single', 'sum', 'pca']
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
FIRST_PC_THRESHOLD = 0.5
MAX_NON_SPECIFIC_EXAMPLES = 1000

__all__ = ['GenderBiasWE', 'BiasWordEmbedding']

_warning_setup()


[docs]class BiasWordEmbedding:
    """Measure and adjust a bias in English word embedding.

    :param model: Word embedding model of ``gensim.model.KeyedVectors``
    :param bool only_lower: Whether the word embedding contrains
                            only lower case words
    :param bool verbose: Set verbosity
    :param bool to_normalize: Whether to normalize all the vectors
                              (recommended!)
    """

    def __init__(self, model, only_lower=False, verbose=False,
                 identify_direction=False, to_normalize=True):
        # pylint: disable=undefined-variable

        assert_gensim_keyed_vectors(model)

        # TODO: this is bad Python, ask someone about it
        # probably should be a better design
        # identify_direction doesn't have any meaning
        # for the class BiasWordEmbedding
        # The goal is to force this interfeace of sub-classes.
        if self.__class__ == __class__ and identify_direction is not False:
            raise ValueError('identify_direction must be False'
                             ' for an instance of {}'
                             .format(__class__))

        self.model = model

        # TODO: write unitest for when it is False
        self.only_lower = only_lower

        self._verbose = verbose

        self.direction = None
        self.positive_end = None
        self.negative_end = None

        if to_normalize:
            self.model.init_sims(replace=True)

    def __copy__(self):
        bias_word_embedding = self.__class__(self.model,
                                             self.only_lower,
                                             self._verbose,
                                             identify_direction=False)
        bias_word_embedding.direction = copy.deepcopy(self.direction)
        bias_word_embedding.positive_end = copy.deepcopy(self.positive_end)
        bias_word_embedding.negative_end = copy.deepcopy(self.negative_end)
        return bias_word_embedding

    def __deepcopy__(self, memo):
        bias_word_embedding = copy.copy(self)
        bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
        return bias_word_embedding

    def __getitem__(self, key):
        return self.model[key]

    def __contains__(self, item):
        return item in self.model

    def _filter_words_by_model(self, words):
        return [word for word in words if word in self]

    def _is_direction_identified(self):
        if self.direction is None:
            raise RuntimeError('The direction was not identified'
                               ' for this {} instance'
                               .format(self.__class__.__name__))

    # There is a mistake in the article
    # it is written (section 5.1):
    # "To identify the gender subspace, we took the ten gender pair difference
    # vectors and computed its principal components (PCs)"
    # however in the source code:
    # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245
    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
        matrix = []

        for word1, word2 in definitional_pairs:
            vector1 = normalize(self[word1])
            vector2 = normalize(self[word2])

            center = (vector1 + vector2) / 2

            matrix.append(vector1 - center)
            matrix.append(vector2 - center)

        pca = PCA(n_components=n_components)
        pca.fit(matrix)

        if self._verbose:
            table = enumerate(pca.explained_variance_ratio_, start=1)
            headers = ['Principal Component',
                       'Explained Variance Ratio']
            print(tabulate(table, headers=headers))

        return pca

    # TODO: add the SVD method from section 6 step 1
    # It seems there is a mistake there, I think it is the same as PCA
    # just with replacing it with SVD
    def _identify_direction(self, positive_end, negative_end,
                            definitional, method='pca'):
        if method not in DIRECTION_METHODS:
            raise ValueError('method should be one of {}, {} was given'.format(
                DIRECTION_METHODS, method))

        if positive_end == negative_end:
            raise ValueError('positive_end and negative_end'
                             'should be different, and not the same "{}"'
                             .format(positive_end))
        if self._verbose:
            print('Identify direction using {} method...'.format(method))

        direction = None

        if method == 'single':
            if self._verbose:
                print('Positive definitional end:', definitional[0])
                print('Negative definitional end:', definitional[1])
            direction = normalize(normalize(self[definitional[0]])
                                  - normalize(self[definitional[1]]))

        elif method == 'sum':
            group1_sum_vector = np.sum([self[word]
                                        for word in definitional[0]], axis=0)
            group2_sum_vector = np.sum([self[word]
                                        for word in definitional[1]], axis=0)

            diff_vector = (normalize(group1_sum_vector)
                           - normalize(group2_sum_vector))

            direction = normalize(diff_vector)

        elif method == 'pca':
            pca = self._identify_subspace_by_pca(definitional, 10)
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
                raise RuntimeError('The Explained variance'
                                   'of the first principal component should be'
                                   'at least {}, but it is {}'
                                   .format(FIRST_PC_THRESHOLD,
                                           pca.explained_variance_ratio_[0]))
            direction = pca.components_[0]

            # if direction is opposite (e.g. we cannot control
            # what the PCA will return)
            ends_diff_projection = cosine_similarity((self[positive_end]
                                                      - self[negative_end]),
                                                     direction)
            if ends_diff_projection < 0:
                direction = -direction  # pylint: disable=invalid-unary-operand-type

        self.direction = direction
        self.positive_end = positive_end
        self.negative_end = negative_end

[docs]    def project_on_direction(self, word):
        """Project the normalized vector of the word on the direction.

        :param str word: The word tor project
        :return float: The projection scalar
        """

        self._is_direction_identified()

        vector = self[word]
        projection_score = self.model.cosine_similarities(self.direction,
                                                          [vector])[0]
        return projection_score

    def _calc_projection_scores(self, words):
        self._is_direction_identified()

        df = pd.DataFrame({'word': words})

        # TODO: maybe using cosine_similarities on all the vectors?
        # it might be faster
        df['projection'] = df['word'].apply(self.project_on_direction)
        df = df.sort_values('projection', ascending=False)

        return df

[docs]    def calc_projection_data(self, words):
        """
        Calculate projection, projected and rejected vectors of a words list.

        :param list words: List of words
        :return: :class:`pandas.DataFrame` of the projection,
                 projected and rejected vectors of the words list
        """
        projection_data = []
        for word in words:
            vector = self[word]
            projection = self.project_on_direction(word)
            normalized_vector = normalize(vector)

            (projection,
             projected_vector,
             rejected_vector) = project_params(normalized_vector,
                                               self.direction)

            projection_data.append({'word': word,
                                    'vector': vector,
                                    'projection': projection,
                                    'projected_vector': projected_vector,
                                    'rejected_vector': rejected_vector})

        return pd.DataFrame(projection_data)

[docs]    def plot_projection_scores(self, words, n_extreme=10,
                               ax=None, axis_projection_step=None):
        """Plot the projection scalar of words on the direction.

        :param list words: The words tor project
        :param int or None n_extreme: The number of extreme words to show
        :return: The ax object of the plot
        """

        self._is_direction_identified()

        projections_df = self._calc_projection_scores(words)
        projections_df['projection'] = projections_df['projection'].round(2)

        if n_extreme is not None:
            projections_df = take_two_sides_extreme_sorted(projections_df,
                                                           n_extreme=n_extreme)

        if ax is None:
            _, ax = plt.subplots(1)

        if axis_projection_step is None:
            axis_projection_step = 0.1

        cmap = plt.get_cmap('RdBu')
        projections_df['color'] = ((projections_df['projection'] + 0.5)
                                   .apply(cmap))

        most_extream_projection = np.round(
            projections_df['projection']
            .abs()
            .max(),
            decimals=1)

        sns.barplot(x='projection', y='word', data=projections_df,
                    palette=projections_df['color'])

        plt.xticks(np.arange(-most_extream_projection,
                             most_extream_projection + axis_projection_step,
                             axis_projection_step))
        plt.title('← {} {} {} →'.format(self.negative_end,
                                        ' ' * 20,
                                        self.positive_end))

        plt.xlabel('Direction Projection')
        plt.ylabel('Words')

        return ax

[docs]    def plot_dist_projections_on_direction(self, word_groups, ax=None):
        """Plot the projection scalars distribution on the direction.

        :param dict word_groups word: The groups to projects
        :return float: The ax object of the plot
        """

        if ax is None:
            _, ax = plt.subplots(1)

        names = sorted(word_groups.keys())

        for name in names:
            words = word_groups[name]
            label = '{} (#{})'.format(name, len(words))
            vectors = [self[word] for word in words]
            projections = self.model.cosine_similarities(self.direction,
                                                         vectors)
            sns.distplot(projections, hist=False, label=label, ax=ax)

        plt.axvline(0, color='k', linestyle='--')

        plt.title('← {} {} {} →'.format(self.negative_end,
                                        ' ' * 20,
                                        self.positive_end))
        plt.xlabel('Direction Projection')
        plt.ylabel('Density')
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

        return ax

    @classmethod
    def _calc_bias_across_word_embeddings(cls,
                                          word_embedding_bias_dict,
                                          words):
        """
        Calculate to projections and rho of words for two word embeddings.

        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
                                               as values,
                                               and their names as keys.
        :param list words: Words to be projected.
        :return tuple: Projections and spearman rho.
        """
        # pylint: disable=W0212
        assert len(word_embedding_bias_dict) == 2, 'Support only in two'\
                                                    'word embeddings'

        intersection_words = [word for word in words
                              if all(word in web
                                     for web in (word_embedding_bias_dict
                                                 .values()))]

        projections = {name: web._calc_projection_scores(intersection_words)['projection']  # pylint: disable=C0301
                       for name, web in word_embedding_bias_dict.items()}

        df = pd.DataFrame(projections)
        df.index = intersection_words

        rho, _ = spearmanr(*df.transpose().values)
        return df, rho

[docs]    @classmethod
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
                                         words, ax=None, scatter_kwargs=None):
        """
        Plot the projections of same words of two word mbeddings.

        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
                                               as values,
                                               and their names as keys.
        :param list words: Words to be projected.
        :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter.
        :type scatter_kwargs: dict or None
        :return: The ax object of the plot
        """
        # pylint: disable=W0212

        df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
                                                        words)

        if ax is None:
            _, ax = plt.subplots(1)

        if scatter_kwargs is None:
            scatter_kwargs = {}

        name1, name2 = word_embedding_bias_dict.keys()

        ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs)

        plt.title('Bias Across Word Embeddings'
                  '(Spearman Rho = {:0.2f})'.format(rho))

        negative_end = word_embedding_bias_dict[name1].negative_end
        positive_end = word_embedding_bias_dict[name1].positive_end
        plt.xlabel('← {}     {}     {} →'.format(negative_end,
                                                 name1,
                                                 positive_end))
        plt.ylabel('← {}     {}     {} →'.format(negative_end,
                                                 name2,
                                                 positive_end))

        ax_min = round_to_extreme(df.values.min())
        ax_max = round_to_extreme(df.values.max())
        plt.xlim(ax_min, ax_max)
        plt.ylim(ax_min, ax_max)

        return ax

    # TODO: refactor for speed and clarity
[docs]    def generate_analogies(self, n_analogies=100, seed='ends',
                           multiple=False,
                           delta=1., restrict_vocab=30000,
                           unrestricted=False):
        """
        Generate analogies based on a seed vector.

        x - y ~ seed vector.
        or a:x::b:y when a-b ~ seed vector.

        The seed vector can be defined by two word ends,
        or by the bias direction.

        ``delta`` is used for semantically coherent. Default vale of 1
        corresponds to an angle <= pi/3.


        There is criticism regarding generating analogies
        when used with `unstricted=False` and not ignoring analogies
        with `match` column equal to `False`.
        Tolga's technique of generating analogies, as implemented in this
        method, is limited inherently to analogies with x != y, which may
        be force "fake" bias analogies.

        See:

        - Nissim, M., van Noord, R., van der Goot, R. (2019).
          `Fair is Better than Sensational: Man is to Doctor
          as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.

        :param seed: The definition of the seed vector.
                     Either by a tuple of two word ends,
                     or by `'ends` for the pre-defined ends
                     or by `'direction'` for the pre-defined direction vector.
        :param int n_analogies: Number of analogies to generate.
        :param bool multiple: Whether to allow multiple appearances of a word
                              in the analogies.
        :param float delta: Threshold for semantic similarity.
                            The maximal distance between x and y.
        :param int restrict_vocab: The vocabulary size to use.
        :param bool unrestricted: Whether to validate the generated analogies
                                  with unrestricted `most_similar`.
        :return: Data Frame of analogies (x, y), their distances,
                 and their cosine similarity scores
        """
        # pylint: disable=C0301,R0914,E1136

        if not unrestricted:
            warnings.warn('Not Using unrestricted most_similar '
                          'may introduce fake biased analogies.')

        (seed_vector,
         positive_end,
         negative_end) = get_seed_vector(seed, self)

        restrict_vocab_vectors = self.model.vectors[:restrict_vocab]

        normalized_vectors = (restrict_vocab_vectors
                              / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None])

        pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors)

        # `pairs_distances` must be not-equal to zero
        # otherwise, x-y will be the zero vector, and every cosine similarity
        # will be equal to zero.
        # This cause to the **limitation** of this method which enforce a not-same
        # words for x and y.
        pairs_mask = (pairs_distances < delta) & (pairs_distances != 0)

        pairs_indices = np.array(np.nonzero(pairs_mask)).T
        x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0)
        y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0)

        x_minus_y_vectors = x_vectors - y_vectors
        normalized_x_minus_y_vectors = (x_minus_y_vectors
                                        / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None])

        cos_distances = normalized_x_minus_y_vectors @ seed_vector

        sorted_cos_distances_indices = np.argsort(cos_distances)[::-1]

        sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices)

        analogies = []
        generated_words_x = set()
        generated_words_y = set()

        while len(analogies) < n_analogies:
            cos_distance_index = next(sorted_cos_distances_indices_iter)
            paris_index = pairs_indices[cos_distance_index]
            word_x, word_y = [self.model.index2word[index]
                              for index in paris_index]

            if multiple or (not multiple
                            and (word_x not in generated_words_x
                                 and word_y not in generated_words_y)):

                analogy = ({positive_end: word_x,
                            negative_end: word_y,
                            'score': cos_distances[cos_distance_index],
                            'distance': pairs_distances[tuple(paris_index)]})

                generated_words_x.add(word_x)
                generated_words_y.add(word_y)

                if unrestricted:
                    most_x = next(word
                                  for word, _ in most_similar(self.model,
                                                              [word_y, positive_end],
                                                              [negative_end]))
                    most_y = next(word
                                  for word, _ in most_similar(self.model,
                                                              [word_x, negative_end],
                                                              [positive_end]))

                    analogy['most_x'] = most_x
                    analogy['most_y'] = most_y
                    analogy['match'] = ((word_x == most_x)
                                        and (word_y == most_y))

                analogies.append(analogy)

        df = pd.DataFrame(analogies)

        columns = [positive_end, negative_end, 'distance', 'score']

        if unrestricted:
            columns.extend(['most_x', 'most_y', 'match'])

        df = df[columns]

        return df

[docs]    def calc_direct_bias(self, neutral_words, c=None):
        """Calculate the direct bias.

        Based on the projection of neutral words on the direction.

        :param list neutral_words: List of neutral words
        :param c: Strictness of bias measuring
        :type c: float or None
        :return: The direct bias
        """

        if c is None:
            c = 1

        projections = self._calc_projection_scores(neutral_words)['projection']
        direct_bias_terms = np.abs(projections) ** c
        direct_bias = direct_bias_terms.sum() / len(neutral_words)

        return direct_bias

[docs]    def calc_indirect_bias(self, word1, word2):
        """Calculate the indirect bias between two words.

        Based on the amount of shared projection of the words on the direction.

        Also called PairBias.
        :param str word1: First word
        :param str word2: Second word
        :type c: float or None
        :return The indirect bias between the two words
        """

        self._is_direction_identified()

        vector1 = normalize(self[word1])
        vector2 = normalize(self[word2])

        perpendicular_vector1 = reject_vector(vector1, self.direction)
        perpendicular_vector2 = reject_vector(vector2, self.direction)

        inner_product = vector1 @ vector2
        perpendicular_similarity = cosine_similarity(perpendicular_vector1,
                                                     perpendicular_vector2)

        indirect_bias = ((inner_product - perpendicular_similarity)
                         / inner_product)
        return indirect_bias

[docs]    def generate_closest_words_indirect_bias(self,
                                             neutral_positive_end,
                                             neutral_negative_end,
                                             words=None, n_extreme=5):
        """
        Generate closest words to a neutral direction and their indirect bias.

        The direction of the neutral words is used to find
        the most extreme words.
        The indirect bias is calculated between the most extreme words
        and the closest end.

        :param str neutral_positive_end: A word that define the positive side
                                         of the neutral direction.
        :param str neutral_negative_end: A word that define the negative side
                                         of the neutral direction.
        :param list words: List of words to project on the neutral direction.
        :param int n_extreme: The number for the most extreme words
                              (positive and negative) to show.
        :return: Data Frame of the most extreme words
                 with their projection scores and indirect biases.
        """

        neutral_direction = normalize(self[neutral_positive_end]
                                      - self[neutral_negative_end])

        vectors = [normalize(self[word]) for word in words]
        df = (pd.DataFrame([{'word': word,
                             'projection': vector @ neutral_direction}
                            for word, vector in zip(words, vectors)])
              .sort_values('projection', ascending=False))

        df = take_two_sides_extreme_sorted(df, n_extreme,
                                           'end',
                                           neutral_positive_end,
                                           neutral_negative_end)

        df['indirect_bias'] = df.apply(lambda r:
                                       self.calc_indirect_bias(r['word'],
                                                               r['end']),
                                       axis=1)

        df = df.set_index(['end', 'word'])
        df = df[['projection', 'indirect_bias']]

        return df

    def _extract_neutral_words(self, specific_words):
        extended_specific_words = set()

        # because or specific_full data was trained on partial word embedding
        for word in specific_words:
            extended_specific_words.add(word)
            extended_specific_words.add(word.lower())
            extended_specific_words.add(word.upper())
            extended_specific_words.add(word.title())

        neutral_words = [word for word in self.model.vocab
                         if word not in extended_specific_words]

        return neutral_words

    def _neutralize(self, neutral_words):
        self._is_direction_identified()

        if self._verbose:
            neutral_words_iter = tqdm(neutral_words)
        else:
            neutral_words_iter = iter(neutral_words)

        for word in neutral_words_iter:
            neutralized_vector = reject_vector(self[word],
                                               self.direction)
            update_word_vector(self.model, word, neutralized_vector)

        self.model.init_sims(replace=True)

    def _equalize(self, equality_sets):
        # pylint: disable=R0914

        self._is_direction_identified()

        if self._verbose:
            words_data = []

        for equality_set_index, equality_set_words in enumerate(equality_sets):
            equality_set_vectors = [normalize(self[word])
                                    for word in equality_set_words]
            center = np.mean(equality_set_vectors, axis=0)
            (projected_center,
             rejected_center) = project_reject_vector(center,
                                                      self.direction)
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)

            for word, vector in zip(equality_set_words, equality_set_vectors):
                projected_vector = project_vector(vector, self.direction)

                projected_part = normalize(projected_vector - projected_center)

                # In the code it is different of Bolukbasi
                # It behaves the same only for equality_sets
                # with size of 2 (pairs) - not sure!
                # However, my code is the same as the article
                # equalized_vector = rejected_center + scaling * self.direction
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
                # For pairs, projected_part_vector1 == -projected_part_vector2,
                # and this is the same as
                # projected_part_vector1 == self.direction
                equalized_vector = rejected_center + scaling * projected_part

                update_word_vector(self.model, word, equalized_vector)

                if self._verbose:
                    words_data.append({
                        'equality_set_index': equality_set_index,
                        'word': word,
                        'scaling': scaling,
                        'projected_scalar': vector @ self.direction,
                        'equalized_projected_scalar': (equalized_vector
                                                       @ self.direction),
                    })

        if self._verbose:
            print('Equalize Words Data '
                  '(all equal for 1-dim bias space (direction):')
            words_data_df = (pd.DataFrame(words_data)
                             .set_index(['equality_set_index', 'word']))
            print(tabulate(words_data_df, headers='keys'))

        self.model.init_sims(replace=True)

    def _generate_pair_candidates(self, pairs):
        # pylint: disable=line-too-long
        return {(candidate1, candidate2)
                for word1, word2 in pairs
                for candidate1, candidate2 in zip(generate_one_word_forms(word1),
                                                  generate_one_word_forms(word2))
                if candidate1 in self.model and candidate2 in self.model}

[docs]    def debias(self, method='hard', neutral_words=None, equality_sets=None,
               inplace=True):
        """Debias the word embedding.

        :param str method: The method of debiasing.
        :param list neutral_words: List of neutral words
                                   for the neutralize step
        :param list equality_sets: List of equality sets,
                                   for the equalize step.
                                   The sets represent the direction.
        :param bool inplace: Whether to debias the object inplace
                             or return a new one

        .. warning::

          After calling `debias`,
          all the vectors of the word embedding
          will be normalized to unit length.

        """

        # pylint: disable=W0212
        if inplace:
            bias_word_embedding = self
        else:
            bias_word_embedding = copy.deepcopy(self)

        if method not in DEBIAS_METHODS:
            raise ValueError('method should be one of {}, {} was given'.format(
                DEBIAS_METHODS, method))

        if method in ['hard', 'neutralize']:
            if self._verbose:
                print('Neutralize...')
            bias_word_embedding._neutralize(neutral_words)

        if method == 'hard':
            if self._verbose:
                print('Equalize...')

            assert all(len(equality_set) == 2
                       for equality_set in equality_sets), \
                   'Currently supporting only equality pairs.'

            equality_sets = self._generate_pair_candidates(equality_sets)

            bias_word_embedding._equalize(equality_sets)

        if inplace:
            return None
        else:
            return bias_word_embedding

[docs]    def evaluate_word_embedding(self,
                                kwargs_word_pairs=None,
                                kwargs_word_analogies=None):
        """
        Evaluate word pairs tasks and word analogies tasks.

        :param model: Word embedding.
        :param kwargs_word_pairs: Kwargs for
                                  evaluate_word_pairs
                                  method.
        :type kwargs_word_pairs: dict or None
        :param kwargs_word_analogies: Kwargs for
                                      evaluate_word_analogies
                                      method.
        :type evaluate_word_analogies: dict or None
        :return: Tuple of :class:`pandas.DataFrame`
                 for the evaluation results.
        """

        return evaluate_word_embedding(self.model,
                                       kwargs_word_pairs,
                                       kwargs_word_analogies)

[docs]    def learn_full_specific_words(self, seed_specific_words,
                                  max_non_specific_examples=None, debug=None):
        """Learn specific words given a list of seed specific wordsself.

        Using Linear SVM.

        :param list seed_specific_words: List of seed specific words
        :param int max_non_specific_examples: The number of non-specific words
                                              to sample for training
        :return: List of learned specific words and the classifier object
        """

        if debug is None:
            debug = False

        if max_non_specific_examples is None:
            max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES

        data = []
        non_specific_example_count = 0

        for word in self.model.vocab:
            is_specific = word in seed_specific_words

            if not is_specific:
                non_specific_example_count += 1
                if non_specific_example_count <= max_non_specific_examples:
                    data.append((self[word], is_specific))
            else:
                data.append((self[word], is_specific))

        np.random.seed(RANDOM_STATE)
        np.random.shuffle(data)

        X, y = zip(*data)

        X = np.array(X)
        X /= np.linalg.norm(X, axis=1)[:, None]

        y = np.array(y).astype('int')

        clf = LinearSVC(C=1, class_weight='balanced',
                        random_state=RANDOM_STATE)

        clf.fit(X, y)

        full_specific_words = []
        for word in self.model.vocab:
            vector = [normalize(self[word])]
            if clf.predict(vector):
                full_specific_words.append(word)

        if not debug:
            return full_specific_words, clf

        return full_specific_words, clf, X, y

    def _plot_most_biased_one_cluster(self,
                                      most_biased_neutral_words, y_bias,
                                      random_state=1, ax=None):
        most_biased_vectors = [self.model[word]
                               for word in most_biased_neutral_words]

        return plot_clustering_as_classification(most_biased_vectors,
                                                 y_bias,
                                                 random_state=random_state,
                                                 ax=ax)

[docs]    def compute_factual_association(self, factual_properity):
        """Compute association of a factual property to the projection.

        Inspired by WEFAT (Word-Embedding Factual Association Test),
        but it is not the same:
        - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017).
        `Semantics derived automatically
        from language corpora contain human-like biases
        <http://opus.bath.ac.uk/55288/>`_.
        Science, 356(6334), 183-186.

        In a future version, the WEFAT will also be implemented.

        If a word doesn't exist in the word embedding,
        then it will be filtered out.

        For example, in :class:`responsibly.we.bias.GenderBiasWE`,
        the defuat factual property is the percentage of female
        in various occupations
        from the Labor Force Statistics of 2017 Population Survey,
        Taken from: https://arxiv.org/abs/1804.06876

        :param dict factual_properity: Dictionary of words
                                       and their factual values.
        :return: Pearson r, pvalue and the words with their
                 associated factual values
                 and their projection on the bias direction.
        """

        points = {word: (value, self.project_on_direction(word))
                  for word, value in factual_properity.items()
                  if word in self.model}

        x, y = zip(*points.values())

        return pearsonr(x, y), points

[docs]    def plot_factual_association(self, factual_properity, ax=None):
        """Plot association of a factual property to the projection.

        See: :meth:`BiasWordEmbedding.compute_factual_association`

        :param dict factual_properity: Dictionary of words
                                       and their factual values.
        """

        result = self.compute_factual_association(factual_properity)

        (r, pvalue), points = result
        x, y = zip(*points.values())

        if ax is None:
            _, ax = plt.subplots(1)

        ax.scatter(x, y)

        plt.title('Assocsion between Factual Property'
                  'and Projection on Direction '
                  '(Pearson R = {:0.2f} ; pvalue={:0.2f})'
                  .format(r, pvalue))

        plt.xlabel('Factual Property')
        plt.ylabel('Projection on Direction')

        return ax

[docs]    @staticmethod
    def plot_most_biased_clustering(biased, debiased,
                                    seed='ends', n_extreme=500,
                                    random_state=1):
        """Plot clustering as classification of biased neutral words.

        :param biased: Biased word embedding of
                       :class:`~responsibly.we.bias.BiasWordEmbedding`.
        :param debiased: Debiased word embedding of
                         :class:`~responsibly.we.bias.BiasWordEmbedding`.
        :param seed: The definition of the seed vector.
                    Either by a tuple of two word ends,
                    or by `'ends` for the pre-defined ends
                    or by `'direction'` for
                    the pre-defined direction vector.
        :param n_extrem: The number of extreme biased
                         neutral words to use.
        :return: Tuple of list of ax objects of the plot,
                 and a dictionary with the most positive
                 and negative words.

        Based on:

        - Gonen, H., & Goldberg, Y. (2019).
          `Lipstick on a Pig:
          Debiasing Methods Cover up Systematic Gender Biases
          in Word Embeddings But do not Remove
          Them <https://arxiv.org/abs/1903.03862>`_.
          arXiv preprint arXiv:1903.03862.

        - https://github.com/gonenhila/gender_bias_lipstick
        """
        # pylint: disable=protected-access,too-many-locals,line-too-long

        assert biased.positive_end == debiased.positive_end, \
            'Postive ends should be the same.'
        assert biased.negative_end == debiased.negative_end, \
            'Negative ends should be the same.'

        seed_vector, _, _ = get_seed_vector(seed, biased)

        neutral_words = biased._data['neutral_words']
        neutral_word_vectors = (biased[word] for word in neutral_words)
        neutral_word_projections = [(normalize(vector) @ seed_vector, word)
                                    for word, vector
                                    in zip(neutral_words,
                                           neutral_word_vectors)]

        neutral_word_projections.sort()

        _, most_negative_words = zip(*neutral_word_projections[:n_extreme])
        _, most_positive_words = zip(*neutral_word_projections[-n_extreme:])

        most_biased_neutral_words = most_negative_words + most_positive_words

        y_bias = [False] * n_extreme + [True] * n_extreme

        _, axes = plt.subplots(1, 2, figsize=(20, 5))

        acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words,
                                                          y_bias,
                                                          random_state=random_state,
                                                          ax=axes[0])
        axes[0].set_title('Biased - Accuracy={}'.format(acc_biased))

        acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words,
                                                              y_bias,
                                                              random_state=random_state,
                                                              ax=axes[1])
        axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased))

        return axes, {biased.positive_end: most_positive_words,
                      biased.negative_end: most_negative_words}


[docs]class GenderBiasWE(BiasWordEmbedding):
    """Measure and adjust the Gender Bias in English Word Embedding.

    :param model: Word embedding model of ``gensim.model.KeyedVectors``
    :param bool only_lower: Whether the word embedding contrains
                            only lower case words
    :param bool verbose: Set verbosity
    :param str identify_direction: Set the method of identifying
                                   the gender direction:
                                   `'single'`, `'sum'` or `'pca'`.
    :param bool to_normalize: Whether to normalize all the vectors
                              (recommended!)
    """

    def __init__(self, model, only_lower=False, verbose=False,
                 identify_direction='pca', to_normalize=True):
        super().__init__(model=model,
                         only_lower=only_lower,
                         verbose=verbose,
                         to_normalize=True)
        self._initialize_data()

        if identify_direction:
            definitional = None

            if identify_direction == 'single':
                definitional = ('she', 'he')
            elif identify_direction == 'sum':
                definitional = list(zip(*self._data['definitional_pairs']))
            elif identify_direction == 'pca':
                definitional = self._data['definitional_pairs']

            self._identify_direction('she', 'he',
                                     definitional,
                                     identify_direction)

    def _initialize_data(self):
        self._data = copy.deepcopy(BOLUKBASI_DATA['gender'])

        if not self.only_lower:
            self._data['specific_full_with_definitional_equalize'] = \
                generate_words_forms(self
                                     ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301

        for key in self._data['word_group_keys']:
            self._data[key] = (self._filter_words_by_model(self
                                                           ._data[key]))

        self._data['neutral_words'] = self._extract_neutral_words(self
                                                                  ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
        self._data['neutral_words'].sort()
        self._data['word_group_keys'].append('neutral_words')

[docs]    def plot_projection_scores(self, words='professions', n_extreme=10,
                               ax=None, axis_projection_step=None):
        if words == 'professions':
            words = self._data['profession_names']

        return super().plot_projection_scores(words, n_extreme,
                                              ax, axis_projection_step)

[docs]    def plot_dist_projections_on_direction(self, word_groups='bolukbasi',
                                           ax=None):
        if word_groups == 'bolukbasi':
            word_groups = {key: self._data[key]
                           for key in self._data['word_group_keys']}

        return super().plot_dist_projections_on_direction(word_groups, ax)

[docs]    @classmethod
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
                                         ax=None, scatter_kwargs=None):
        # pylint: disable=W0221
        words = BOLUKBASI_DATA['gender']['neutral_profession_names']
        # TODO: is it correct for inheritance of class method?
        super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
                                                         words,
                                                         ax,
                                                         scatter_kwargs)

[docs]    def calc_direct_bias(self, neutral_words='professions', c=None):
        if isinstance(neutral_words, str) and neutral_words == 'professions':
            return super().calc_direct_bias(
                self._data['neutral_profession_names'], c)
        else:
            return super().calc_direct_bias(neutral_words)

[docs]    def generate_closest_words_indirect_bias(self,
                                             neutral_positive_end,
                                             neutral_negative_end,
                                             words='professions', n_extreme=5):
        # pylint: disable=C0301

        if words == 'professions':
            words = self._data['profession_names']

        return super().generate_closest_words_indirect_bias(neutral_positive_end,
                                                            neutral_negative_end,
                                                            words,
                                                            n_extreme=n_extreme)

[docs]    def debias(self, method='hard', neutral_words=None, equality_sets=None,
               inplace=True):
        # pylint: disable=line-too-long
        if method in ['hard', 'neutralize']:
            if neutral_words is None:
                neutral_words = self._data['neutral_words']

        if method == 'hard' and equality_sets is None:
            equality_sets = {tuple(w) for w in self._data['equalize_pairs']}
            equality_sets |= {tuple(w) for w in self._data['definitional_pairs']}

        return super().debias(method, neutral_words, equality_sets,
                              inplace)

[docs]    def learn_full_specific_words(self, seed_specific_words='bolukbasi',
                                  max_non_specific_examples=None,
                                  debug=None):
        if seed_specific_words == 'bolukbasi':
            seed_specific_words = self._data['specific_seed']

        return super().learn_full_specific_words(seed_specific_words,
                                                 max_non_specific_examples,
                                                 debug)

[docs]    def compute_factual_association(self,
                                    factual_properity=OCCUPATION_FEMALE_PRECENTAGE):  # pylint: disable=line-too-long
        return super().compute_factual_association(factual_properity)

[docs]    def plot_factual_association(self,
                                 factual_properity=OCCUPATION_FEMALE_PRECENTAGE,  # pylint: disable=line-too-long
                                 ax=None):
        return super().plot_factual_association(factual_properity, ax)
Source code for responsibly.we.bias

Responsibly

Navigation

Related Topics