# pylint: disable=too-many-lines
"""
Measuring and adjusting bias in word embedding by Bolukbasi (2016).
References:
- Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
& Kalai, A. T. (2016).
`Man is to computer programmer as woman is to homemaker?
debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
In Advances in neural information processing systems
(pp. 4349-4357).
- The code and data is based on the GitHub repository:
https://github.com/tolga-b/debiaswe (MIT License).
- Gonen, H., & Goldberg, Y. (2019).
`Lipstick on a Pig:
Debiasing Methods Cover up Systematic Gender Biases
in Word Embeddings But do not Remove Them
<https://arxiv.org/abs/1903.03862>`_.
arXiv preprint arXiv:1903.03862.
- Nissim, M., van Noord, R., van der Goot, R. (2019).
`Fair is Better than Sensational: Man is to Doctor
as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
Usage
~~~~~
.. code:: python
>>> from responsibly.we import GenderBiasWE
>>> from gensim import downloader
>>> w2v_model = downloader.load('word2vec-google-news-300')
>>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
>>> w2v_gender_bias_we.calc_direct_bias()
0.07307904249481942
>>> w2v_gender_bias_we.debias()
>>> w2v_gender_bias_we.calc_direct_bias()
1.7964246601064155e-09
Types of Bias
~~~~~~~~~~~~~
Direct Bias
^^^^^^^^^^^
1. Associations
Words that are closer to one end (e.g., *he*) than to
the other end (*she*).
For example, occupational stereotypes (page 7).
Calculated by
:meth:`~responsibly.we.bias.BiasWordEmbedding.calc_direct_bias`.
2. Analogies
Analogies of *he:x::she:y*.
For example analogies exhibiting stereotypes (page 7).
Generated by
:meth:`~responsibly.we.bias.BiasWordEmbedding.generate_analogies`.
Indirect Bias
^^^^^^^^^^^^^
Projection of a neutral words into a two neutral words direction
is explained in a great portion by a shared bias direction projection.
Calculated by
:meth:`~responsibly.we.bias.BiasWordEmbedding.calc_indirect_bias`
and
:meth:`~responsibly.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.
"""
import copy
import warnings
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.svm import LinearSVC
from tabulate import tabulate
from tqdm import tqdm
from responsibly.consts import RANDOM_STATE
from responsibly.utils import _warning_setup
from responsibly.we.benchmark import evaluate_word_embedding
from responsibly.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE
from responsibly.we.utils import (
assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
generate_words_forms, get_seed_vector, most_similar, normalize,
plot_clustering_as_classification, project_params, project_reject_vector,
project_vector, reject_vector, round_to_extreme,
take_two_sides_extreme_sorted, update_word_vector,
)
DIRECTION_METHODS = ['single', 'sum', 'pca']
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
FIRST_PC_THRESHOLD = 0.5
MAX_NON_SPECIFIC_EXAMPLES = 1000
__all__ = ['GenderBiasWE', 'BiasWordEmbedding']
_warning_setup()
[docs]class BiasWordEmbedding:
"""Measure and adjust a bias in English word embedding.
:param model: Word embedding model of ``gensim.model.KeyedVectors``
:param bool only_lower: Whether the word embedding contrains
only lower case words
:param bool verbose: Set verbosity
:param bool to_normalize: Whether to normalize all the vectors
(recommended!)
"""
def __init__(self, model, only_lower=False, verbose=False,
identify_direction=False, to_normalize=True):
# pylint: disable=undefined-variable
assert_gensim_keyed_vectors(model)
# TODO: this is bad Python, ask someone about it
# probably should be a better design
# identify_direction doesn't have any meaning
# for the class BiasWordEmbedding
# The goal is to force this interfeace of sub-classes.
if self.__class__ == __class__ and identify_direction is not False:
raise ValueError('identify_direction must be False'
' for an instance of {}'
.format(__class__))
self.model = model
# TODO: write unitest for when it is False
self.only_lower = only_lower
self._verbose = verbose
self.direction = None
self.positive_end = None
self.negative_end = None
if to_normalize:
self.model.init_sims(replace=True)
def __copy__(self):
bias_word_embedding = self.__class__(self.model,
self.only_lower,
self._verbose,
identify_direction=False)
bias_word_embedding.direction = copy.deepcopy(self.direction)
bias_word_embedding.positive_end = copy.deepcopy(self.positive_end)
bias_word_embedding.negative_end = copy.deepcopy(self.negative_end)
return bias_word_embedding
def __deepcopy__(self, memo):
bias_word_embedding = copy.copy(self)
bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
return bias_word_embedding
def __getitem__(self, key):
return self.model[key]
def __contains__(self, item):
return item in self.model
def _filter_words_by_model(self, words):
return [word for word in words if word in self]
def _is_direction_identified(self):
if self.direction is None:
raise RuntimeError('The direction was not identified'
' for this {} instance'
.format(self.__class__.__name__))
# There is a mistake in the article
# it is written (section 5.1):
# "To identify the gender subspace, we took the ten gender pair difference
# vectors and computed its principal components (PCs)"
# however in the source code:
# https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245
def _identify_subspace_by_pca(self, definitional_pairs, n_components):
matrix = []
for word1, word2 in definitional_pairs:
vector1 = normalize(self[word1])
vector2 = normalize(self[word2])
center = (vector1 + vector2) / 2
matrix.append(vector1 - center)
matrix.append(vector2 - center)
pca = PCA(n_components=n_components)
pca.fit(matrix)
if self._verbose:
table = enumerate(pca.explained_variance_ratio_, start=1)
headers = ['Principal Component',
'Explained Variance Ratio']
print(tabulate(table, headers=headers))
return pca
# TODO: add the SVD method from section 6 step 1
# It seems there is a mistake there, I think it is the same as PCA
# just with replacing it with SVD
def _identify_direction(self, positive_end, negative_end,
definitional, method='pca'):
if method not in DIRECTION_METHODS:
raise ValueError('method should be one of {}, {} was given'.format(
DIRECTION_METHODS, method))
if positive_end == negative_end:
raise ValueError('positive_end and negative_end'
'should be different, and not the same "{}"'
.format(positive_end))
if self._verbose:
print('Identify direction using {} method...'.format(method))
direction = None
if method == 'single':
if self._verbose:
print('Positive definitional end:', definitional[0])
print('Negative definitional end:', definitional[1])
direction = normalize(normalize(self[definitional[0]])
- normalize(self[definitional[1]]))
elif method == 'sum':
group1_sum_vector = np.sum([self[word]
for word in definitional[0]], axis=0)
group2_sum_vector = np.sum([self[word]
for word in definitional[1]], axis=0)
diff_vector = (normalize(group1_sum_vector)
- normalize(group2_sum_vector))
direction = normalize(diff_vector)
elif method == 'pca':
pca = self._identify_subspace_by_pca(definitional, 10)
if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
raise RuntimeError('The Explained variance'
'of the first principal component should be'
'at least {}, but it is {}'
.format(FIRST_PC_THRESHOLD,
pca.explained_variance_ratio_[0]))
direction = pca.components_[0]
# if direction is opposite (e.g. we cannot control
# what the PCA will return)
ends_diff_projection = cosine_similarity((self[positive_end]
- self[negative_end]),
direction)
if ends_diff_projection < 0:
direction = -direction # pylint: disable=invalid-unary-operand-type
self.direction = direction
self.positive_end = positive_end
self.negative_end = negative_end
[docs] def project_on_direction(self, word):
"""Project the normalized vector of the word on the direction.
:param str word: The word tor project
:return float: The projection scalar
"""
self._is_direction_identified()
vector = self[word]
projection_score = self.model.cosine_similarities(self.direction,
[vector])[0]
return projection_score
def _calc_projection_scores(self, words):
self._is_direction_identified()
df = pd.DataFrame({'word': words})
# TODO: maybe using cosine_similarities on all the vectors?
# it might be faster
df['projection'] = df['word'].apply(self.project_on_direction)
df = df.sort_values('projection', ascending=False)
return df
[docs] def calc_projection_data(self, words):
"""
Calculate projection, projected and rejected vectors of a words list.
:param list words: List of words
:return: :class:`pandas.DataFrame` of the projection,
projected and rejected vectors of the words list
"""
projection_data = []
for word in words:
vector = self[word]
projection = self.project_on_direction(word)
normalized_vector = normalize(vector)
(projection,
projected_vector,
rejected_vector) = project_params(normalized_vector,
self.direction)
projection_data.append({'word': word,
'vector': vector,
'projection': projection,
'projected_vector': projected_vector,
'rejected_vector': rejected_vector})
return pd.DataFrame(projection_data)
[docs] def plot_projection_scores(self, words, n_extreme=10,
ax=None, axis_projection_step=None):
"""Plot the projection scalar of words on the direction.
:param list words: The words tor project
:param int or None n_extreme: The number of extreme words to show
:return: The ax object of the plot
"""
self._is_direction_identified()
projections_df = self._calc_projection_scores(words)
projections_df['projection'] = projections_df['projection'].round(2)
if n_extreme is not None:
projections_df = take_two_sides_extreme_sorted(projections_df,
n_extreme=n_extreme)
if ax is None:
_, ax = plt.subplots(1)
if axis_projection_step is None:
axis_projection_step = 0.1
cmap = plt.get_cmap('RdBu')
projections_df['color'] = ((projections_df['projection'] + 0.5)
.apply(cmap))
most_extream_projection = np.round(
projections_df['projection']
.abs()
.max(),
decimals=1)
sns.barplot(x='projection', y='word', data=projections_df,
palette=projections_df['color'])
plt.xticks(np.arange(-most_extream_projection,
most_extream_projection + axis_projection_step,
axis_projection_step))
plt.title('← {} {} {} →'.format(self.negative_end,
' ' * 20,
self.positive_end))
plt.xlabel('Direction Projection')
plt.ylabel('Words')
return ax
[docs] def plot_dist_projections_on_direction(self, word_groups, ax=None):
"""Plot the projection scalars distribution on the direction.
:param dict word_groups word: The groups to projects
:return float: The ax object of the plot
"""
if ax is None:
_, ax = plt.subplots(1)
names = sorted(word_groups.keys())
for name in names:
words = word_groups[name]
label = '{} (#{})'.format(name, len(words))
vectors = [self[word] for word in words]
projections = self.model.cosine_similarities(self.direction,
vectors)
sns.distplot(projections, hist=False, label=label, ax=ax)
plt.axvline(0, color='k', linestyle='--')
plt.title('← {} {} {} →'.format(self.negative_end,
' ' * 20,
self.positive_end))
plt.xlabel('Direction Projection')
plt.ylabel('Density')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
return ax
@classmethod
def _calc_bias_across_word_embeddings(cls,
word_embedding_bias_dict,
words):
"""
Calculate to projections and rho of words for two word embeddings.
:param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
as values,
and their names as keys.
:param list words: Words to be projected.
:return tuple: Projections and spearman rho.
"""
# pylint: disable=W0212
assert len(word_embedding_bias_dict) == 2, 'Support only in two'\
'word embeddings'
intersection_words = [word for word in words
if all(word in web
for web in (word_embedding_bias_dict
.values()))]
projections = {name: web._calc_projection_scores(intersection_words)['projection'] # pylint: disable=C0301
for name, web in word_embedding_bias_dict.items()}
df = pd.DataFrame(projections)
df.index = intersection_words
rho, _ = spearmanr(*df.transpose().values)
return df, rho
[docs] @classmethod
def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
words, ax=None, scatter_kwargs=None):
"""
Plot the projections of same words of two word mbeddings.
:param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
as values,
and their names as keys.
:param list words: Words to be projected.
:param scatter_kwargs: Kwargs for matplotlib.pylab.scatter.
:type scatter_kwargs: dict or None
:return: The ax object of the plot
"""
# pylint: disable=W0212
df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict, # pylint: disable=C0301
words)
if ax is None:
_, ax = plt.subplots(1)
if scatter_kwargs is None:
scatter_kwargs = {}
name1, name2 = word_embedding_bias_dict.keys()
ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs)
plt.title('Bias Across Word Embeddings'
'(Spearman Rho = {:0.2f})'.format(rho))
negative_end = word_embedding_bias_dict[name1].negative_end
positive_end = word_embedding_bias_dict[name1].positive_end
plt.xlabel('← {} {} {} →'.format(negative_end,
name1,
positive_end))
plt.ylabel('← {} {} {} →'.format(negative_end,
name2,
positive_end))
ax_min = round_to_extreme(df.values.min())
ax_max = round_to_extreme(df.values.max())
plt.xlim(ax_min, ax_max)
plt.ylim(ax_min, ax_max)
return ax
# TODO: refactor for speed and clarity
[docs] def generate_analogies(self, n_analogies=100, seed='ends',
multiple=False,
delta=1., restrict_vocab=30000,
unrestricted=False):
"""
Generate analogies based on a seed vector.
x - y ~ seed vector.
or a:x::b:y when a-b ~ seed vector.
The seed vector can be defined by two word ends,
or by the bias direction.
``delta`` is used for semantically coherent. Default vale of 1
corresponds to an angle <= pi/3.
There is criticism regarding generating analogies
when used with `unstricted=False` and not ignoring analogies
with `match` column equal to `False`.
Tolga's technique of generating analogies, as implemented in this
method, is limited inherently to analogies with x != y, which may
be force "fake" bias analogies.
See:
- Nissim, M., van Noord, R., van der Goot, R. (2019).
`Fair is Better than Sensational: Man is to Doctor
as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
:param seed: The definition of the seed vector.
Either by a tuple of two word ends,
or by `'ends` for the pre-defined ends
or by `'direction'` for the pre-defined direction vector.
:param int n_analogies: Number of analogies to generate.
:param bool multiple: Whether to allow multiple appearances of a word
in the analogies.
:param float delta: Threshold for semantic similarity.
The maximal distance between x and y.
:param int restrict_vocab: The vocabulary size to use.
:param bool unrestricted: Whether to validate the generated analogies
with unrestricted `most_similar`.
:return: Data Frame of analogies (x, y), their distances,
and their cosine similarity scores
"""
# pylint: disable=C0301,R0914,E1136
if not unrestricted:
warnings.warn('Not Using unrestricted most_similar '
'may introduce fake biased analogies.')
(seed_vector,
positive_end,
negative_end) = get_seed_vector(seed, self)
restrict_vocab_vectors = self.model.vectors[:restrict_vocab]
normalized_vectors = (restrict_vocab_vectors
/ np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None])
pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors)
# `pairs_distances` must be not-equal to zero
# otherwise, x-y will be the zero vector, and every cosine similarity
# will be equal to zero.
# This cause to the **limitation** of this method which enforce a not-same
# words for x and y.
pairs_mask = (pairs_distances < delta) & (pairs_distances != 0)
pairs_indices = np.array(np.nonzero(pairs_mask)).T
x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0)
y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0)
x_minus_y_vectors = x_vectors - y_vectors
normalized_x_minus_y_vectors = (x_minus_y_vectors
/ np.linalg.norm(x_minus_y_vectors, axis=1)[:, None])
cos_distances = normalized_x_minus_y_vectors @ seed_vector
sorted_cos_distances_indices = np.argsort(cos_distances)[::-1]
sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices)
analogies = []
generated_words_x = set()
generated_words_y = set()
while len(analogies) < n_analogies:
cos_distance_index = next(sorted_cos_distances_indices_iter)
paris_index = pairs_indices[cos_distance_index]
word_x, word_y = [self.model.index2word[index]
for index in paris_index]
if multiple or (not multiple
and (word_x not in generated_words_x
and word_y not in generated_words_y)):
analogy = ({positive_end: word_x,
negative_end: word_y,
'score': cos_distances[cos_distance_index],
'distance': pairs_distances[tuple(paris_index)]})
generated_words_x.add(word_x)
generated_words_y.add(word_y)
if unrestricted:
most_x = next(word
for word, _ in most_similar(self.model,
[word_y, positive_end],
[negative_end]))
most_y = next(word
for word, _ in most_similar(self.model,
[word_x, negative_end],
[positive_end]))
analogy['most_x'] = most_x
analogy['most_y'] = most_y
analogy['match'] = ((word_x == most_x)
and (word_y == most_y))
analogies.append(analogy)
df = pd.DataFrame(analogies)
columns = [positive_end, negative_end, 'distance', 'score']
if unrestricted:
columns.extend(['most_x', 'most_y', 'match'])
df = df[columns]
return df
[docs] def calc_direct_bias(self, neutral_words, c=None):
"""Calculate the direct bias.
Based on the projection of neutral words on the direction.
:param list neutral_words: List of neutral words
:param c: Strictness of bias measuring
:type c: float or None
:return: The direct bias
"""
if c is None:
c = 1
projections = self._calc_projection_scores(neutral_words)['projection']
direct_bias_terms = np.abs(projections) ** c
direct_bias = direct_bias_terms.sum() / len(neutral_words)
return direct_bias
[docs] def calc_indirect_bias(self, word1, word2):
"""Calculate the indirect bias between two words.
Based on the amount of shared projection of the words on the direction.
Also called PairBias.
:param str word1: First word
:param str word2: Second word
:type c: float or None
:return The indirect bias between the two words
"""
self._is_direction_identified()
vector1 = normalize(self[word1])
vector2 = normalize(self[word2])
perpendicular_vector1 = reject_vector(vector1, self.direction)
perpendicular_vector2 = reject_vector(vector2, self.direction)
inner_product = vector1 @ vector2
perpendicular_similarity = cosine_similarity(perpendicular_vector1,
perpendicular_vector2)
indirect_bias = ((inner_product - perpendicular_similarity)
/ inner_product)
return indirect_bias
[docs] def generate_closest_words_indirect_bias(self,
neutral_positive_end,
neutral_negative_end,
words=None, n_extreme=5):
"""
Generate closest words to a neutral direction and their indirect bias.
The direction of the neutral words is used to find
the most extreme words.
The indirect bias is calculated between the most extreme words
and the closest end.
:param str neutral_positive_end: A word that define the positive side
of the neutral direction.
:param str neutral_negative_end: A word that define the negative side
of the neutral direction.
:param list words: List of words to project on the neutral direction.
:param int n_extreme: The number for the most extreme words
(positive and negative) to show.
:return: Data Frame of the most extreme words
with their projection scores and indirect biases.
"""
neutral_direction = normalize(self[neutral_positive_end]
- self[neutral_negative_end])
vectors = [normalize(self[word]) for word in words]
df = (pd.DataFrame([{'word': word,
'projection': vector @ neutral_direction}
for word, vector in zip(words, vectors)])
.sort_values('projection', ascending=False))
df = take_two_sides_extreme_sorted(df, n_extreme,
'end',
neutral_positive_end,
neutral_negative_end)
df['indirect_bias'] = df.apply(lambda r:
self.calc_indirect_bias(r['word'],
r['end']),
axis=1)
df = df.set_index(['end', 'word'])
df = df[['projection', 'indirect_bias']]
return df
def _extract_neutral_words(self, specific_words):
extended_specific_words = set()
# because or specific_full data was trained on partial word embedding
for word in specific_words:
extended_specific_words.add(word)
extended_specific_words.add(word.lower())
extended_specific_words.add(word.upper())
extended_specific_words.add(word.title())
neutral_words = [word for word in self.model.vocab
if word not in extended_specific_words]
return neutral_words
def _neutralize(self, neutral_words):
self._is_direction_identified()
if self._verbose:
neutral_words_iter = tqdm(neutral_words)
else:
neutral_words_iter = iter(neutral_words)
for word in neutral_words_iter:
neutralized_vector = reject_vector(self[word],
self.direction)
update_word_vector(self.model, word, neutralized_vector)
self.model.init_sims(replace=True)
def _equalize(self, equality_sets):
# pylint: disable=R0914
self._is_direction_identified()
if self._verbose:
words_data = []
for equality_set_index, equality_set_words in enumerate(equality_sets):
equality_set_vectors = [normalize(self[word])
for word in equality_set_words]
center = np.mean(equality_set_vectors, axis=0)
(projected_center,
rejected_center) = project_reject_vector(center,
self.direction)
scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)
for word, vector in zip(equality_set_words, equality_set_vectors):
projected_vector = project_vector(vector, self.direction)
projected_part = normalize(projected_vector - projected_center)
# In the code it is different of Bolukbasi
# It behaves the same only for equality_sets
# with size of 2 (pairs) - not sure!
# However, my code is the same as the article
# equalized_vector = rejected_center + scaling * self.direction
# https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
# For pairs, projected_part_vector1 == -projected_part_vector2,
# and this is the same as
# projected_part_vector1 == self.direction
equalized_vector = rejected_center + scaling * projected_part
update_word_vector(self.model, word, equalized_vector)
if self._verbose:
words_data.append({
'equality_set_index': equality_set_index,
'word': word,
'scaling': scaling,
'projected_scalar': vector @ self.direction,
'equalized_projected_scalar': (equalized_vector
@ self.direction),
})
if self._verbose:
print('Equalize Words Data '
'(all equal for 1-dim bias space (direction):')
words_data_df = (pd.DataFrame(words_data)
.set_index(['equality_set_index', 'word']))
print(tabulate(words_data_df, headers='keys'))
self.model.init_sims(replace=True)
def _generate_pair_candidates(self, pairs):
# pylint: disable=line-too-long
return {(candidate1, candidate2)
for word1, word2 in pairs
for candidate1, candidate2 in zip(generate_one_word_forms(word1),
generate_one_word_forms(word2))
if candidate1 in self.model and candidate2 in self.model}
[docs] def debias(self, method='hard', neutral_words=None, equality_sets=None,
inplace=True):
"""Debias the word embedding.
:param str method: The method of debiasing.
:param list neutral_words: List of neutral words
for the neutralize step
:param list equality_sets: List of equality sets,
for the equalize step.
The sets represent the direction.
:param bool inplace: Whether to debias the object inplace
or return a new one
.. warning::
After calling `debias`,
all the vectors of the word embedding
will be normalized to unit length.
"""
# pylint: disable=W0212
if inplace:
bias_word_embedding = self
else:
bias_word_embedding = copy.deepcopy(self)
if method not in DEBIAS_METHODS:
raise ValueError('method should be one of {}, {} was given'.format(
DEBIAS_METHODS, method))
if method in ['hard', 'neutralize']:
if self._verbose:
print('Neutralize...')
bias_word_embedding._neutralize(neutral_words)
if method == 'hard':
if self._verbose:
print('Equalize...')
assert all(len(equality_set) == 2
for equality_set in equality_sets), \
'Currently supporting only equality pairs.'
equality_sets = self._generate_pair_candidates(equality_sets)
bias_word_embedding._equalize(equality_sets)
if inplace:
return None
else:
return bias_word_embedding
[docs] def evaluate_word_embedding(self,
kwargs_word_pairs=None,
kwargs_word_analogies=None):
"""
Evaluate word pairs tasks and word analogies tasks.
:param model: Word embedding.
:param kwargs_word_pairs: Kwargs for
evaluate_word_pairs
method.
:type kwargs_word_pairs: dict or None
:param kwargs_word_analogies: Kwargs for
evaluate_word_analogies
method.
:type evaluate_word_analogies: dict or None
:return: Tuple of :class:`pandas.DataFrame`
for the evaluation results.
"""
return evaluate_word_embedding(self.model,
kwargs_word_pairs,
kwargs_word_analogies)
[docs] def learn_full_specific_words(self, seed_specific_words,
max_non_specific_examples=None, debug=None):
"""Learn specific words given a list of seed specific wordsself.
Using Linear SVM.
:param list seed_specific_words: List of seed specific words
:param int max_non_specific_examples: The number of non-specific words
to sample for training
:return: List of learned specific words and the classifier object
"""
if debug is None:
debug = False
if max_non_specific_examples is None:
max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES
data = []
non_specific_example_count = 0
for word in self.model.vocab:
is_specific = word in seed_specific_words
if not is_specific:
non_specific_example_count += 1
if non_specific_example_count <= max_non_specific_examples:
data.append((self[word], is_specific))
else:
data.append((self[word], is_specific))
np.random.seed(RANDOM_STATE)
np.random.shuffle(data)
X, y = zip(*data)
X = np.array(X)
X /= np.linalg.norm(X, axis=1)[:, None]
y = np.array(y).astype('int')
clf = LinearSVC(C=1, class_weight='balanced',
random_state=RANDOM_STATE)
clf.fit(X, y)
full_specific_words = []
for word in self.model.vocab:
vector = [normalize(self[word])]
if clf.predict(vector):
full_specific_words.append(word)
if not debug:
return full_specific_words, clf
return full_specific_words, clf, X, y
def _plot_most_biased_one_cluster(self,
most_biased_neutral_words, y_bias,
random_state=1, ax=None):
most_biased_vectors = [self.model[word]
for word in most_biased_neutral_words]
return plot_clustering_as_classification(most_biased_vectors,
y_bias,
random_state=random_state,
ax=ax)
[docs] def compute_factual_association(self, factual_properity):
"""Compute association of a factual property to the projection.
Inspired by WEFAT (Word-Embedding Factual Association Test),
but it is not the same:
- Caliskan, A., Bryson, J. J., & Narayanan, A. (2017).
`Semantics derived automatically
from language corpora contain human-like biases
<http://opus.bath.ac.uk/55288/>`_.
Science, 356(6334), 183-186.
In a future version, the WEFAT will also be implemented.
If a word doesn't exist in the word embedding,
then it will be filtered out.
For example, in :class:`responsibly.we.bias.GenderBiasWE`,
the defuat factual property is the percentage of female
in various occupations
from the Labor Force Statistics of 2017 Population Survey,
Taken from: https://arxiv.org/abs/1804.06876
:param dict factual_properity: Dictionary of words
and their factual values.
:return: Pearson r, pvalue and the words with their
associated factual values
and their projection on the bias direction.
"""
points = {word: (value, self.project_on_direction(word))
for word, value in factual_properity.items()
if word in self.model}
x, y = zip(*points.values())
return pearsonr(x, y), points
[docs] def plot_factual_association(self, factual_properity, ax=None):
"""Plot association of a factual property to the projection.
See: :meth:`BiasWordEmbedding.compute_factual_association`
:param dict factual_properity: Dictionary of words
and their factual values.
"""
result = self.compute_factual_association(factual_properity)
(r, pvalue), points = result
x, y = zip(*points.values())
if ax is None:
_, ax = plt.subplots(1)
ax.scatter(x, y)
plt.title('Assocsion between Factual Property'
'and Projection on Direction '
'(Pearson R = {:0.2f} ; pvalue={:0.2f})'
.format(r, pvalue))
plt.xlabel('Factual Property')
plt.ylabel('Projection on Direction')
return ax
[docs] @staticmethod
def plot_most_biased_clustering(biased, debiased,
seed='ends', n_extreme=500,
random_state=1):
"""Plot clustering as classification of biased neutral words.
:param biased: Biased word embedding of
:class:`~responsibly.we.bias.BiasWordEmbedding`.
:param debiased: Debiased word embedding of
:class:`~responsibly.we.bias.BiasWordEmbedding`.
:param seed: The definition of the seed vector.
Either by a tuple of two word ends,
or by `'ends` for the pre-defined ends
or by `'direction'` for
the pre-defined direction vector.
:param n_extrem: The number of extreme biased
neutral words to use.
:return: Tuple of list of ax objects of the plot,
and a dictionary with the most positive
and negative words.
Based on:
- Gonen, H., & Goldberg, Y. (2019).
`Lipstick on a Pig:
Debiasing Methods Cover up Systematic Gender Biases
in Word Embeddings But do not Remove
Them <https://arxiv.org/abs/1903.03862>`_.
arXiv preprint arXiv:1903.03862.
- https://github.com/gonenhila/gender_bias_lipstick
"""
# pylint: disable=protected-access,too-many-locals,line-too-long
assert biased.positive_end == debiased.positive_end, \
'Postive ends should be the same.'
assert biased.negative_end == debiased.negative_end, \
'Negative ends should be the same.'
seed_vector, _, _ = get_seed_vector(seed, biased)
neutral_words = biased._data['neutral_words']
neutral_word_vectors = (biased[word] for word in neutral_words)
neutral_word_projections = [(normalize(vector) @ seed_vector, word)
for word, vector
in zip(neutral_words,
neutral_word_vectors)]
neutral_word_projections.sort()
_, most_negative_words = zip(*neutral_word_projections[:n_extreme])
_, most_positive_words = zip(*neutral_word_projections[-n_extreme:])
most_biased_neutral_words = most_negative_words + most_positive_words
y_bias = [False] * n_extreme + [True] * n_extreme
_, axes = plt.subplots(1, 2, figsize=(20, 5))
acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words,
y_bias,
random_state=random_state,
ax=axes[0])
axes[0].set_title('Biased - Accuracy={}'.format(acc_biased))
acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words,
y_bias,
random_state=random_state,
ax=axes[1])
axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased))
return axes, {biased.positive_end: most_positive_words,
biased.negative_end: most_negative_words}
[docs]class GenderBiasWE(BiasWordEmbedding):
"""Measure and adjust the Gender Bias in English Word Embedding.
:param model: Word embedding model of ``gensim.model.KeyedVectors``
:param bool only_lower: Whether the word embedding contrains
only lower case words
:param bool verbose: Set verbosity
:param str identify_direction: Set the method of identifying
the gender direction:
`'single'`, `'sum'` or `'pca'`.
:param bool to_normalize: Whether to normalize all the vectors
(recommended!)
"""
def __init__(self, model, only_lower=False, verbose=False,
identify_direction='pca', to_normalize=True):
super().__init__(model=model,
only_lower=only_lower,
verbose=verbose,
to_normalize=True)
self._initialize_data()
if identify_direction:
definitional = None
if identify_direction == 'single':
definitional = ('she', 'he')
elif identify_direction == 'sum':
definitional = list(zip(*self._data['definitional_pairs']))
elif identify_direction == 'pca':
definitional = self._data['definitional_pairs']
self._identify_direction('she', 'he',
definitional,
identify_direction)
def _initialize_data(self):
self._data = copy.deepcopy(BOLUKBASI_DATA['gender'])
if not self.only_lower:
self._data['specific_full_with_definitional_equalize'] = \
generate_words_forms(self
._data['specific_full_with_definitional_equalize']) # pylint: disable=C0301
for key in self._data['word_group_keys']:
self._data[key] = (self._filter_words_by_model(self
._data[key]))
self._data['neutral_words'] = self._extract_neutral_words(self
._data['specific_full_with_definitional_equalize']) # pylint: disable=C0301
self._data['neutral_words'].sort()
self._data['word_group_keys'].append('neutral_words')
[docs] def plot_projection_scores(self, words='professions', n_extreme=10,
ax=None, axis_projection_step=None):
if words == 'professions':
words = self._data['profession_names']
return super().plot_projection_scores(words, n_extreme,
ax, axis_projection_step)
[docs] def plot_dist_projections_on_direction(self, word_groups='bolukbasi',
ax=None):
if word_groups == 'bolukbasi':
word_groups = {key: self._data[key]
for key in self._data['word_group_keys']}
return super().plot_dist_projections_on_direction(word_groups, ax)
[docs] @classmethod
def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
ax=None, scatter_kwargs=None):
# pylint: disable=W0221
words = BOLUKBASI_DATA['gender']['neutral_profession_names']
# TODO: is it correct for inheritance of class method?
super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict, # pylint: disable=C0301
words,
ax,
scatter_kwargs)
[docs] def calc_direct_bias(self, neutral_words='professions', c=None):
if isinstance(neutral_words, str) and neutral_words == 'professions':
return super().calc_direct_bias(
self._data['neutral_profession_names'], c)
else:
return super().calc_direct_bias(neutral_words)
[docs] def generate_closest_words_indirect_bias(self,
neutral_positive_end,
neutral_negative_end,
words='professions', n_extreme=5):
# pylint: disable=C0301
if words == 'professions':
words = self._data['profession_names']
return super().generate_closest_words_indirect_bias(neutral_positive_end,
neutral_negative_end,
words,
n_extreme=n_extreme)
[docs] def debias(self, method='hard', neutral_words=None, equality_sets=None,
inplace=True):
# pylint: disable=line-too-long
if method in ['hard', 'neutralize']:
if neutral_words is None:
neutral_words = self._data['neutral_words']
if method == 'hard' and equality_sets is None:
equality_sets = {tuple(w) for w in self._data['equalize_pairs']}
equality_sets |= {tuple(w) for w in self._data['definitional_pairs']}
return super().debias(method, neutral_words, equality_sets,
inplace)
[docs] def learn_full_specific_words(self, seed_specific_words='bolukbasi',
max_non_specific_examples=None,
debug=None):
if seed_specific_words == 'bolukbasi':
seed_specific_words = self._data['specific_seed']
return super().learn_full_specific_words(seed_specific_words,
max_non_specific_examples,
debug)
[docs] def compute_factual_association(self,
factual_properity=OCCUPATION_FEMALE_PRECENTAGE): # pylint: disable=line-too-long
return super().compute_factual_association(factual_properity)
[docs] def plot_factual_association(self,
factual_properity=OCCUPATION_FEMALE_PRECENTAGE, # pylint: disable=line-too-long
ax=None):
return super().plot_factual_association(factual_properity, ax)