Source code for responsibly.we.bias

# pylint: disable=too-many-lines
"""
Measuring and adjusting bias in word embedding by Bolukbasi (2016).

References:
    - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
      & Kalai, A. T. (2016).
      `Man is to computer programmer as woman is to homemaker?
      debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
      In Advances in neural information processing systems
      (pp. 4349-4357).

    - The code and data is based on the GitHub repository:
      https://github.com/tolga-b/debiaswe (MIT License).

    - Gonen, H., & Goldberg, Y. (2019).
      `Lipstick on a Pig:
      Debiasing Methods Cover up Systematic Gender Biases
      in Word Embeddings But do not Remove Them
      <https://arxiv.org/abs/1903.03862>`_.
      arXiv preprint arXiv:1903.03862.

    - Nissim, M., van Noord, R., van der Goot, R. (2019).
      `Fair is Better than Sensational: Man is to Doctor
      as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.

Usage
~~~~~

.. code:: python

   >>> from responsibly.we import GenderBiasWE
   >>> from gensim import downloader
   >>> w2v_model = downloader.load('word2vec-google-news-300')
   >>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
   >>> w2v_gender_bias_we.calc_direct_bias()
   0.07307904249481942
   >>> w2v_gender_bias_we.debias()
   >>> w2v_gender_bias_we.calc_direct_bias()
   1.7964246601064155e-09

Types of Bias
~~~~~~~~~~~~~

Direct Bias
^^^^^^^^^^^

1. Associations
    Words that are closer to one end (e.g., *he*) than to
    the other end (*she*).
    For example, occupational stereotypes (page 7).
    Calculated by
    :meth:`~responsibly.we.bias.BiasWordEmbedding.calc_direct_bias`.

2. Analogies
    Analogies of *he:x::she:y*.
    For example analogies exhibiting stereotypes (page 7).
    Generated by
    :meth:`~responsibly.we.bias.BiasWordEmbedding.generate_analogies`.


Indirect Bias
^^^^^^^^^^^^^

Projection of a neutral words into a two neutral words direction
is explained in a great portion by a shared bias direction projection.

Calculated by
:meth:`~responsibly.we.bias.BiasWordEmbedding.calc_indirect_bias`
and
:meth:`~responsibly.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.

"""

import copy
import warnings

import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.svm import LinearSVC
from tqdm import tqdm

from responsibly.consts import RANDOM_STATE
from responsibly.utils import _warning_setup
from responsibly.we.benchmark import evaluate_word_embedding
from responsibly.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE
from responsibly.we.utils import (
    assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
    generate_words_forms, get_seed_vector, most_similar, normalize,
    plot_clustering_as_classification, project_params, project_reject_vector,
    project_vector, reject_vector, round_to_extreme,
    take_two_sides_extreme_sorted, update_word_vector,
)
from tabulate import tabulate


DIRECTION_METHODS = ['single', 'sum', 'pca']
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
FIRST_PC_THRESHOLD = 0.5
MAX_NON_SPECIFIC_EXAMPLES = 1000

__all__ = ['GenderBiasWE', 'BiasWordEmbedding']

_warning_setup()


[docs]class BiasWordEmbedding: """Measure and adjust a bias in English word embedding. :param model: Word embedding model of ``gensim.model.KeyedVectors`` :param bool only_lower: Whether the word embedding contrains only lower case words :param bool verbose: Set verbosity :param bool to_normalize: Whether to normalize all the vectors (recommended!) """ def __init__(self, model, only_lower=False, verbose=False, identify_direction=False, to_normalize=True): assert_gensim_keyed_vectors(model) # TODO: this is bad Python, ask someone about it # probably should be a better design # identify_direction doesn't have any meaning # for the class BiasWordEmbedding # The goal is to force this interfeace of sub-classes. if self.__class__ == __class__ and identify_direction is not False: raise ValueError('identify_direction must be False' ' for an instance of {}' .format(__class__)) self.model = model # TODO: write unitest for when it is False self.only_lower = only_lower self._verbose = verbose self.direction = None self.positive_end = None self.negative_end = None if to_normalize: self.model.init_sims(replace=True) def __copy__(self): bias_word_embedding = self.__class__(self.model, self.only_lower, self._verbose, identify_direction=False) bias_word_embedding.direction = copy.deepcopy(self.direction) bias_word_embedding.positive_end = copy.deepcopy(self.positive_end) bias_word_embedding.negative_end = copy.deepcopy(self.negative_end) return bias_word_embedding def __deepcopy__(self, memo): bias_word_embedding = copy.copy(self) bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model) return bias_word_embedding def __getitem__(self, key): return self.model[key] def __contains__(self, item): return item in self.model def _filter_words_by_model(self, words): return [word for word in words if word in self] def _is_direction_identified(self): if self.direction is None: raise RuntimeError('The direction was not identified' ' for this {} instance' .format(self.__class__.__name__)) # There is a mistake in the article # it is written (section 5.1): # "To identify the gender subspace, we took the ten gender pair difference # vectors and computed its principal components (PCs)" # however in the source code: # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245 def _identify_subspace_by_pca(self, definitional_pairs, n_components): matrix = [] for word1, word2 in definitional_pairs: vector1 = normalize(self[word1]) vector2 = normalize(self[word2]) center = (vector1 + vector2) / 2 matrix.append(vector1 - center) matrix.append(vector2 - center) pca = PCA(n_components=n_components) pca.fit(matrix) if self._verbose: table = enumerate(pca.explained_variance_ratio_, start=1) headers = ['Principal Component', 'Explained Variance Ratio'] print(tabulate(table, headers=headers)) return pca # TODO: add the SVD method from section 6 step 1 # It seems there is a mistake there, I think it is the same as PCA # just with replacing it with SVD def _identify_direction(self, positive_end, negative_end, definitional, method='pca'): if method not in DIRECTION_METHODS: raise ValueError('method should be one of {}, {} was given'.format( DIRECTION_METHODS, method)) if positive_end == negative_end: raise ValueError('positive_end and negative_end' 'should be different, and not the same "{}"' .format(positive_end)) if self._verbose: print('Identify direction using {} method...'.format(method)) direction = None if method == 'single': if self._verbose: print('Positive definitional end:', definitional[0]) print('Negative definitional end:', definitional[1]) direction = normalize(normalize(self[definitional[0]]) - normalize(self[definitional[1]])) elif method == 'sum': group1_sum_vector = np.sum([self[word] for word in definitional[0]], axis=0) group2_sum_vector = np.sum([self[word] for word in definitional[1]], axis=0) diff_vector = (normalize(group1_sum_vector) - normalize(group2_sum_vector)) direction = normalize(diff_vector) elif method == 'pca': pca = self._identify_subspace_by_pca(definitional, 10) if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD: raise RuntimeError('The Explained variance' 'of the first principal component should be' 'at least {}, but it is {}' .format(FIRST_PC_THRESHOLD, pca.explained_variance_ratio_[0])) direction = pca.components_[0] # if direction is opposite (e.g. we cannot control # what the PCA will return) ends_diff_projection = cosine_similarity((self[positive_end] - self[negative_end]), direction) if ends_diff_projection < 0: direction = -direction # pylint: disable=invalid-unary-operand-type self.direction = direction self.positive_end = positive_end self.negative_end = negative_end
[docs] def project_on_direction(self, word): """Project the normalized vector of the word on the direction. :param str word: The word tor project :return float: The projection scalar """ self._is_direction_identified() vector = self[word] projection_score = self.model.cosine_similarities(self.direction, [vector])[0] return projection_score
def _calc_projection_scores(self, words): self._is_direction_identified() df = pd.DataFrame({'word': words}) # TODO: maybe using cosine_similarities on all the vectors? # it might be faster df['projection'] = df['word'].apply(self.project_on_direction) df = df.sort_values('projection', ascending=False) return df
[docs] def calc_projection_data(self, words): """ Calculate projection, projected and rejected vectors of a words list. :param list words: List of words :return: :class:`pandas.DataFrame` of the projection, projected and rejected vectors of the words list """ projection_data = [] for word in words: vector = self[word] projection = self.project_on_direction(word) normalized_vector = normalize(vector) (projection, projected_vector, rejected_vector) = project_params(normalized_vector, self.direction) projection_data.append({'word': word, 'vector': vector, 'projection': projection, 'projected_vector': projected_vector, 'rejected_vector': rejected_vector}) return pd.DataFrame(projection_data)
[docs] def plot_projection_scores(self, words, n_extreme=10, ax=None, axis_projection_step=None): """Plot the projection scalar of words on the direction. :param list words: The words tor project :param int or None n_extreme: The number of extreme words to show :return: The ax object of the plot """ self._is_direction_identified() projections_df = self._calc_projection_scores(words) projections_df['projection'] = projections_df['projection'].round(2) if n_extreme is not None: projections_df = take_two_sides_extreme_sorted(projections_df, n_extreme=n_extreme) if ax is None: _, ax = plt.subplots(1) if axis_projection_step is None: axis_projection_step = 0.1 cmap = plt.get_cmap('RdBu') projections_df['color'] = ((projections_df['projection'] + 0.5) .apply(cmap)) most_extream_projection = (projections_df['projection'] .abs() .max() .round(1)) sns.barplot(x='projection', y='word', data=projections_df, palette=projections_df['color']) plt.xticks(np.arange(-most_extream_projection, most_extream_projection + axis_projection_step, axis_projection_step)) plt.title('← {} {} {} →'.format(self.negative_end, ' ' * 20, self.positive_end)) plt.xlabel('Direction Projection') plt.ylabel('Words') return ax
[docs] def plot_dist_projections_on_direction(self, word_groups, ax=None): """Plot the projection scalars distribution on the direction. :param dict word_groups word: The groups to projects :return float: The ax object of the plot """ if ax is None: _, ax = plt.subplots(1) names = sorted(word_groups.keys()) for name in names: words = word_groups[name] label = '{} (#{})'.format(name, len(words)) vectors = [self[word] for word in words] projections = self.model.cosine_similarities(self.direction, vectors) sns.distplot(projections, hist=False, label=label, ax=ax) plt.axvline(0, color='k', linestyle='--') plt.title('← {} {} {} →'.format(self.negative_end, ' ' * 20, self.positive_end)) plt.xlabel('Direction Projection') plt.ylabel('Density') ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) return ax
@classmethod def _calc_bias_across_word_embeddings(cls, word_embedding_bias_dict, words): """ Calculate to projections and rho of words for two word embeddings. :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects as values, and their names as keys. :param list words: Words to be projected. :return tuple: Projections and spearman rho. """ # pylint: disable=W0212 assert len(word_embedding_bias_dict) == 2, 'Support only in two'\ 'word embeddings' intersection_words = [word for word in words if all(word in web for web in (word_embedding_bias_dict .values()))] projections = {name: web._calc_projection_scores(intersection_words)['projection'] # pylint: disable=C0301 for name, web in word_embedding_bias_dict.items()} df = pd.DataFrame(projections) df.index = intersection_words rho, _ = spearmanr(*df.transpose().values) return df, rho
[docs] @classmethod def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict, words, ax=None, scatter_kwargs=None): """ Plot the projections of same words of two word mbeddings. :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects as values, and their names as keys. :param list words: Words to be projected. :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter. :type scatter_kwargs: dict or None :return: The ax object of the plot """ # pylint: disable=W0212 df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict, # pylint: disable=C0301 words) if ax is None: _, ax = plt.subplots(1) if scatter_kwargs is None: scatter_kwargs = {} name1, name2 = word_embedding_bias_dict.keys() ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs) plt.title('Bias Across Word Embeddings' '(Spearman Rho = {:0.2f})'.format(rho)) negative_end = word_embedding_bias_dict[name1].negative_end positive_end = word_embedding_bias_dict[name1].positive_end plt.xlabel('← {} {} {} →'.format(negative_end, name1, positive_end)) plt.ylabel('← {} {} {} →'.format(negative_end, name2, positive_end)) ax_min = round_to_extreme(df.values.min()) ax_max = round_to_extreme(df.values.max()) plt.xlim(ax_min, ax_max) plt.ylim(ax_min, ax_max) return ax
# TODO: refactor for speed and clarity
[docs] def generate_analogies(self, n_analogies=100, seed='ends', multiple=False, delta=1., restrict_vocab=30000, unrestricted=False): """ Generate analogies based on a seed vector. x - y ~ seed vector. or a:x::b:y when a-b ~ seed vector. The seed vector can be defined by two word ends, or by the bias direction. ``delta`` is used for semantically coherent. Default vale of 1 corresponds to an angle <= pi/3. There is criticism regarding generating analogies when used with `unstricted=False` and not ignoring analogies with `match` column equal to `False`. Tolga's technique of generating analogies, as implemented in this method, is limited inherently to analogies with x != y, which may be force "fake" bias analogies. See: - Nissim, M., van Noord, R., van der Goot, R. (2019). `Fair is Better than Sensational: Man is to Doctor as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_. :param seed: The definition of the seed vector. Either by a tuple of two word ends, or by `'ends` for the pre-defined ends or by `'direction'` for the pre-defined direction vector. :param int n_analogies: Number of analogies to generate. :param bool multiple: Whether to allow multiple appearances of a word in the analogies. :param float delta: Threshold for semantic similarity. The maximal distance between x and y. :param int restrict_vocab: The vocabulary size to use. :param bool unrestricted: Whether to validate the generated analogies with unrestricted `most_similar`. :return: Data Frame of analogies (x, y), their distances, and their cosine similarity scores """ # pylint: disable=C0301,R0914 if not unrestricted: warnings.warn('Not Using unrestricted most_similar ' 'may introduce fake biased analogies.') (seed_vector, positive_end, negative_end) = get_seed_vector(seed, self) restrict_vocab_vectors = self.model.vectors[:restrict_vocab] normalized_vectors = (restrict_vocab_vectors / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None]) pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors) # `pairs_distances` must be not-equal to zero # otherwise, x-y will be the zero vector, and every cosine similarity # will be equal to zero. # This cause to the **limitation** of this method which enforce a not-same # words for x and y. pairs_mask = (pairs_distances < delta) & (pairs_distances != 0) pairs_indices = np.array(np.nonzero(pairs_mask)).T x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0) y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0) x_minus_y_vectors = x_vectors - y_vectors normalized_x_minus_y_vectors = (x_minus_y_vectors / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None]) cos_distances = normalized_x_minus_y_vectors @ seed_vector sorted_cos_distances_indices = np.argsort(cos_distances)[::-1] sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices) analogies = [] generated_words_x = set() generated_words_y = set() while len(analogies) < n_analogies: cos_distance_index = next(sorted_cos_distances_indices_iter) paris_index = pairs_indices[cos_distance_index] word_x, word_y = [self.model.index2word[index] for index in paris_index] if multiple or (not multiple and (word_x not in generated_words_x and word_y not in generated_words_y)): analogy = ({positive_end: word_x, negative_end: word_y, 'score': cos_distances[cos_distance_index], 'distance': pairs_distances[tuple(paris_index)]}) generated_words_x.add(word_x) generated_words_y.add(word_y) if unrestricted: most_x = next(word for word, _ in most_similar(self.model, [word_y, positive_end], [negative_end])) most_y = next(word for word, _ in most_similar(self.model, [word_x, negative_end], [positive_end])) analogy['most_x'] = most_x analogy['most_y'] = most_y analogy['match'] = ((word_x == most_x) and (word_y == most_y)) analogies.append(analogy) df = pd.DataFrame(analogies) columns = [positive_end, negative_end, 'distance', 'score'] if unrestricted: columns.extend(['most_x', 'most_y', 'match']) df = df[columns] return df
[docs] def calc_direct_bias(self, neutral_words, c=None): """Calculate the direct bias. Based on the projection of neutral words on the direction. :param list neutral_words: List of neutral words :param c: Strictness of bias measuring :type c: float or None :return: The direct bias """ if c is None: c = 1 projections = self._calc_projection_scores(neutral_words)['projection'] direct_bias_terms = np.abs(projections) ** c direct_bias = direct_bias_terms.sum() / len(neutral_words) return direct_bias
[docs] def calc_indirect_bias(self, word1, word2): """Calculate the indirect bias between two words. Based on the amount of shared projection of the words on the direction. Also called PairBias. :param str word1: First word :param str word2: Second word :type c: float or None :return The indirect bias between the two words """ self._is_direction_identified() vector1 = normalize(self[word1]) vector2 = normalize(self[word2]) perpendicular_vector1 = reject_vector(vector1, self.direction) perpendicular_vector2 = reject_vector(vector2, self.direction) inner_product = vector1 @ vector2 perpendicular_similarity = cosine_similarity(perpendicular_vector1, perpendicular_vector2) indirect_bias = ((inner_product - perpendicular_similarity) / inner_product) return indirect_bias
[docs] def generate_closest_words_indirect_bias(self, neutral_positive_end, neutral_negative_end, words=None, n_extreme=5): """ Generate closest words to a neutral direction and their indirect bias. The direction of the neutral words is used to find the most extreme words. The indirect bias is calculated between the most extreme words and the closest end. :param str neutral_positive_end: A word that define the positive side of the neutral direction. :param str neutral_negative_end: A word that define the negative side of the neutral direction. :param list words: List of words to project on the neutral direction. :param int n_extreme: The number for the most extreme words (positive and negative) to show. :return: Data Frame of the most extreme words with their projection scores and indirect biases. """ neutral_direction = normalize(self[neutral_positive_end] - self[neutral_negative_end]) vectors = [normalize(self[word]) for word in words] df = (pd.DataFrame([{'word': word, 'projection': vector @ neutral_direction} for word, vector in zip(words, vectors)]) .sort_values('projection', ascending=False)) df = take_two_sides_extreme_sorted(df, n_extreme, 'end', neutral_positive_end, neutral_negative_end) df['indirect_bias'] = df.apply(lambda r: self.calc_indirect_bias(r['word'], r['end']), axis=1) df = df.set_index(['end', 'word']) df = df[['projection', 'indirect_bias']] return df
def _extract_neutral_words(self, specific_words): extended_specific_words = set() # because or specific_full data was trained on partial word embedding for word in specific_words: extended_specific_words.add(word) extended_specific_words.add(word.lower()) extended_specific_words.add(word.upper()) extended_specific_words.add(word.title()) neutral_words = [word for word in self.model.vocab if word not in extended_specific_words] return neutral_words def _neutralize(self, neutral_words): self._is_direction_identified() if self._verbose: neutral_words_iter = tqdm(neutral_words) else: neutral_words_iter = iter(neutral_words) for word in neutral_words_iter: neutralized_vector = reject_vector(self[word], self.direction) update_word_vector(self.model, word, neutralized_vector) self.model.init_sims(replace=True) def _equalize(self, equality_sets): # pylint: disable=R0914 self._is_direction_identified() if self._verbose: words_data = [] for equality_set_index, equality_set_words in enumerate(equality_sets): equality_set_vectors = [normalize(self[word]) for word in equality_set_words] center = np.mean(equality_set_vectors, axis=0) (projected_center, rejected_center) = project_reject_vector(center, self.direction) scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2) for word, vector in zip(equality_set_words, equality_set_vectors): projected_vector = project_vector(vector, self.direction) projected_part = normalize(projected_vector - projected_center) # In the code it is different of Bolukbasi # It behaves the same only for equality_sets # with size of 2 (pairs) - not sure! # However, my code is the same as the article # equalized_vector = rejected_center + scaling * self.direction # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37 # For pairs, projected_part_vector1 == -projected_part_vector2, # and this is the same as # projected_part_vector1 == self.direction equalized_vector = rejected_center + scaling * projected_part update_word_vector(self.model, word, equalized_vector) if self._verbose: words_data.append({ 'equality_set_index': equality_set_index, 'word': word, 'scaling': scaling, 'projected_scalar': vector @ self.direction, 'equalized_projected_scalar': (equalized_vector @ self.direction), }) if self._verbose: print('Equalize Words Data ' '(all equal for 1-dim bias space (direction):') words_data_df = (pd.DataFrame(words_data) .set_index(['equality_set_index', 'word'])) print(tabulate(words_data_df, headers='keys')) self.model.init_sims(replace=True) def _generate_pair_candidates(self, pairs): # pylint: disable=line-too-long return {(candidate1, candidate2) for word1, word2 in pairs for candidate1, candidate2 in zip(generate_one_word_forms(word1), generate_one_word_forms(word2)) if candidate1 in self.model and candidate2 in self.model}
[docs] def debias(self, method='hard', neutral_words=None, equality_sets=None, inplace=True): """Debias the word embedding. :param str method: The method of debiasing. :param list neutral_words: List of neutral words for the neutralize step :param list equality_sets: List of equality sets, for the equalize step. The sets represent the direction. :param bool inplace: Whether to debias the object inplace or return a new one .. warning:: After calling `debias`, all the vectors of the word embedding will be normalized to unit length. """ # pylint: disable=W0212 if inplace: bias_word_embedding = self else: bias_word_embedding = copy.deepcopy(self) if method not in DEBIAS_METHODS: raise ValueError('method should be one of {}, {} was given'.format( DEBIAS_METHODS, method)) if method in ['hard', 'neutralize']: if self._verbose: print('Neutralize...') bias_word_embedding._neutralize(neutral_words) if method == 'hard': if self._verbose: print('Equalize...') assert all(len(equality_set) == 2 for equality_set in equality_sets), \ 'Currently supporting only equality pairs.' equality_sets = self._generate_pair_candidates(equality_sets) bias_word_embedding._equalize(equality_sets) if inplace: return None else: return bias_word_embedding
[docs] def evaluate_word_embedding(self, kwargs_word_pairs=None, kwargs_word_analogies=None): """ Evaluate word pairs tasks and word analogies tasks. :param model: Word embedding. :param kwargs_word_pairs: Kwargs for evaluate_word_pairs method. :type kwargs_word_pairs: dict or None :param kwargs_word_analogies: Kwargs for evaluate_word_analogies method. :type evaluate_word_analogies: dict or None :return: Tuple of :class:`pandas.DataFrame` for the evaluation results. """ return evaluate_word_embedding(self.model, kwargs_word_pairs, kwargs_word_analogies)
[docs] def learn_full_specific_words(self, seed_specific_words, max_non_specific_examples=None, debug=None): """Learn specific words given a list of seed specific wordsself. Using Linear SVM. :param list seed_specific_words: List of seed specific words :param int max_non_specific_examples: The number of non-specific words to sample for training :return: List of learned specific words and the classifier object """ if debug is None: debug = False if max_non_specific_examples is None: max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES data = [] non_specific_example_count = 0 for word in self.model.vocab: is_specific = word in seed_specific_words if not is_specific: non_specific_example_count += 1 if non_specific_example_count <= max_non_specific_examples: data.append((self[word], is_specific)) else: data.append((self[word], is_specific)) np.random.seed(RANDOM_STATE) np.random.shuffle(data) X, y = zip(*data) X = np.array(X) X /= np.linalg.norm(X, axis=1)[:, None] y = np.array(y).astype('int') clf = LinearSVC(C=1, class_weight='balanced', random_state=RANDOM_STATE) clf.fit(X, y) full_specific_words = [] for word in self.model.vocab: vector = [normalize(self[word])] if clf.predict(vector): full_specific_words.append(word) if not debug: return full_specific_words, clf return full_specific_words, clf, X, y
def _plot_most_biased_one_cluster(self, most_biased_neutral_words, y_bias, random_state=1, ax=None): most_biased_vectors = [self.model[word] for word in most_biased_neutral_words] return plot_clustering_as_classification(most_biased_vectors, y_bias, random_state=random_state, ax=ax)
[docs] def compute_factual_association(self, factual_properity): """Compute association of a factual property to the projection. Inspired by WEFAT (Word-Embedding Factual Association Test), but it is not the same: - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017). `Semantics derived automatically from language corpora contain human-like biases <http://opus.bath.ac.uk/55288/>`_. Science, 356(6334), 183-186. In a future version, the WEFAT will also be implemented. If a word doesn't exist in the word embedding, then it will be filtered out. For example, in :class:`responsibly.we.bias.GenderBiasWE`, the defuat factual property is the percentage of female in various occupations from the Labor Force Statistics of 2017 Population Survey, Taken from: https://arxiv.org/abs/1804.06876 :param dict factual_properity: Dictionary of words and their factual values. :return: Pearson r, pvalue and the words with their associated factual values and their projection on the bias direction. """ points = {word: (value, self.project_on_direction(word)) for word, value in factual_properity.items() if word in self.model} x, y = zip(*points.values()) return pearsonr(x, y), points
[docs] def plot_factual_association(self, factual_properity, ax=None): """Plot association of a factual property to the projection. See: :meth:`BiasWordEmbedding.compute_factual_association` :param dict factual_properity: Dictionary of words and their factual values. """ result = self.compute_factual_association(factual_properity) (r, pvalue), points = result x, y = zip(*points.values()) if ax is None: _, ax = plt.subplots(1) ax.scatter(x, y) plt.title('Assocsion between Factual Property' 'and Projection on Direction ' '(Pearson R = {:0.2f} ; pvalue={:0.2f})' .format(r, pvalue)) plt.xlabel('Factual Property') plt.ylabel('Projection on Direction') return ax
[docs] @staticmethod def plot_most_biased_clustering(biased, debiased, seed='ends', n_extreme=500, random_state=1): """Plot clustering as classification of biased neutral words. :param biased: Biased word embedding of :class:`~responsibly.we.bias.BiasWordEmbedding`. :param debiased: Debiased word embedding of :class:`~responsibly.we.bias.BiasWordEmbedding`. :param seed: The definition of the seed vector. Either by a tuple of two word ends, or by `'ends` for the pre-defined ends or by `'direction'` for the pre-defined direction vector. :param n_extrem: The number of extreme biased neutral words to use. :return: Tuple of list of ax objects of the plot, and a dictionary with the most positive and negative words. Based on: - Gonen, H., & Goldberg, Y. (2019). `Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them <https://arxiv.org/abs/1903.03862>`_. arXiv preprint arXiv:1903.03862. - https://github.com/gonenhila/gender_bias_lipstick """ # pylint: disable=protected-access,too-many-locals,line-too-long assert biased.positive_end == debiased.positive_end, \ 'Postive ends should be the same.' assert biased.negative_end == debiased.negative_end, \ 'Negative ends should be the same.' seed_vector, _, _ = get_seed_vector(seed, biased) neutral_words = biased._data['neutral_words'] neutral_word_vectors = (biased[word] for word in neutral_words) neutral_word_projections = [(normalize(vector) @ seed_vector, word) for word, vector in zip(neutral_words, neutral_word_vectors)] neutral_word_projections.sort() _, most_negative_words = zip(*neutral_word_projections[:n_extreme]) _, most_positive_words = zip(*neutral_word_projections[-n_extreme:]) most_biased_neutral_words = most_negative_words + most_positive_words y_bias = [False] * n_extreme + [True] * n_extreme _, axes = plt.subplots(1, 2, figsize=(20, 5)) acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words, y_bias, random_state=random_state, ax=axes[0]) axes[0].set_title('Biased - Accuracy={}'.format(acc_biased)) acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words, y_bias, random_state=random_state, ax=axes[1]) axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased)) return axes, {biased.positive_end: most_positive_words, biased.negative_end: most_negative_words}
[docs]class GenderBiasWE(BiasWordEmbedding): """Measure and adjust the Gender Bias in English Word Embedding. :param model: Word embedding model of ``gensim.model.KeyedVectors`` :param bool only_lower: Whether the word embedding contrains only lower case words :param bool verbose: Set verbosity :param str identify_direction: Set the method of identifying the gender direction: `'single'`, `'sum'` or `'pca'`. :param bool to_normalize: Whether to normalize all the vectors (recommended!) """ def __init__(self, model, only_lower=False, verbose=False, identify_direction='pca', to_normalize=True): super().__init__(model=model, only_lower=only_lower, verbose=verbose, to_normalize=True) self._initialize_data() if identify_direction: definitional = None if identify_direction == 'single': definitional = ('she', 'he') elif identify_direction == 'sum': definitional = list(zip(*self._data['definitional_pairs'])) elif identify_direction == 'pca': definitional = self._data['definitional_pairs'] self._identify_direction('she', 'he', definitional, identify_direction) def _initialize_data(self): self._data = copy.deepcopy(BOLUKBASI_DATA['gender']) if not self.only_lower: self._data['specific_full_with_definitional_equalize'] = \ generate_words_forms(self ._data['specific_full_with_definitional_equalize']) # pylint: disable=C0301 for key in self._data['word_group_keys']: self._data[key] = (self._filter_words_by_model(self ._data[key])) self._data['neutral_words'] = self._extract_neutral_words(self ._data['specific_full_with_definitional_equalize']) # pylint: disable=C0301 self._data['neutral_words'].sort() self._data['word_group_keys'].append('neutral_words')
[docs] def plot_projection_scores(self, words='professions', n_extreme=10, ax=None, axis_projection_step=None): if words == 'professions': words = self._data['profession_names'] return super().plot_projection_scores(words, n_extreme, ax, axis_projection_step)
[docs] def plot_dist_projections_on_direction(self, word_groups='bolukbasi', ax=None): if word_groups == 'bolukbasi': word_groups = {key: self._data[key] for key in self._data['word_group_keys']} return super().plot_dist_projections_on_direction(word_groups, ax)
[docs] @classmethod def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict, ax=None, scatter_kwargs=None): # pylint: disable=W0221 words = BOLUKBASI_DATA['gender']['neutral_profession_names'] # TODO: is it correct for inheritance of class method? super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict, # pylint: disable=C0301 words, ax, scatter_kwargs)
[docs] def calc_direct_bias(self, neutral_words='professions', c=None): if isinstance(neutral_words, str) and neutral_words == 'professions': return super().calc_direct_bias( self._data['neutral_profession_names'], c) else: return super().calc_direct_bias(neutral_words)
[docs] def generate_closest_words_indirect_bias(self, neutral_positive_end, neutral_negative_end, words='professions', n_extreme=5): # pylint: disable=C0301 if words == 'professions': words = self._data['profession_names'] return super().generate_closest_words_indirect_bias(neutral_positive_end, neutral_negative_end, words, n_extreme=n_extreme)
[docs] def debias(self, method='hard', neutral_words=None, equality_sets=None, inplace=True): # pylint: disable=line-too-long if method in ['hard', 'neutralize']: if neutral_words is None: neutral_words = self._data['neutral_words'] if method == 'hard' and equality_sets is None: equality_sets = {tuple(w) for w in self._data['equalize_pairs']} equality_sets |= {tuple(w) for w in self._data['definitional_pairs']} return super().debias(method, neutral_words, equality_sets, inplace)
[docs] def learn_full_specific_words(self, seed_specific_words='bolukbasi', max_non_specific_examples=None, debug=None): if seed_specific_words == 'bolukbasi': seed_specific_words = self._data['specific_seed'] return super().learn_full_specific_words(seed_specific_words, max_non_specific_examples, debug)
[docs] def compute_factual_association(self, factual_properity=OCCUPATION_FEMALE_PRECENTAGE): # pylint: disable=line-too-long return super().compute_factual_association(factual_properity)
[docs] def plot_factual_association(self, factual_properity=OCCUPATION_FEMALE_PRECENTAGE, # pylint: disable=line-too-long ax=None): return super().plot_factual_association(factual_properity, ax)