Skip to content

Commit

Permalink
Initialize position-dependent weights with uniform distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed Nov 15, 2020
1 parent 94a57ff commit fa9dfcf
Showing 1 changed file with 32 additions and 31 deletions.
63 changes: 32 additions & 31 deletions gensim/models/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@
"""

from math import sqrt
import logging
import os

Expand Down Expand Up @@ -482,7 +483,9 @@ def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, vector_size=100
# with no eligible char-ngram lengths, no buckets need be allocated
bucket = 0

self.wv = FastTextKeyedVectors(vector_size, position_dependent_vector_size, min_n, max_n, bucket)
self.wv = FastTextKeyedVectors(
vector_size, position_dependent_weights, position_dependent_vector_size, min_n, max_n,
bucket)
self.wv.bucket = bucket

super(FastText, self).__init__(
Expand All @@ -505,9 +508,7 @@ def prepare_weights(self, update=False):
"""
super(FastText, self).prepare_weights(update=update)
if not update:
self.wv.init_ngrams_weights(self.seed)
if self.position_dependent_weights:
self.wv.init_positional_weights(self.window)
self.wv.init_ngrams_weights(self.seed, self.window)
# EXPERIMENTAL lockf feature; create minimal no-op lockf arrays (1 element of 1.0)
# advanced users should directly resize/adjust as necessary
self.wv.vectors_vocab_lockf = ones(1, dtype=REAL)
Expand Down Expand Up @@ -598,9 +599,7 @@ def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, prog
"""
if not update:
self.wv.init_ngrams_weights(self.seed)
if self.position_dependent_weights:
self.wv.init_positional_weights(self.window)
self.wv.init_ngrams_weights(self.seed, self.window)
elif not len(self.wv):
raise RuntimeError(
"You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
Expand Down Expand Up @@ -1172,7 +1171,7 @@ def save_facebook_model(model, path, encoding="utf-8", lr_update_rate=100, word_


class FastTextKeyedVectors(KeyedVectors):
def __init__(self, vector_size, position_dependent_vector_size, min_n, max_n, bucket):
def __init__(self, vector_size, position_dependent_weights, position_dependent_vector_size, min_n, max_n, bucket):
"""Vectors and vocab for :class:`~gensim.models.fasttext.FastText`.
Implements significant parts of the FastText algorithm. For example,
Expand All @@ -1188,6 +1187,8 @@ def __init__(self, vector_size, position_dependent_vector_size, min_n, max_n, bu
----------
vector_size : int
The dimensionality of all vectors.
position_dependent_weights : bool
Whether position-dependent weight vectors will also be stored.
position_dependent_vector_size : int
How many features of the trained vector features should be
position-dependent. Decreasing the number of position-dependent
Expand Down Expand Up @@ -1222,11 +1223,12 @@ def __init__(self, vector_size, position_dependent_vector_size, min_n, max_n, bu
"""
super(FastTextKeyedVectors, self).__init__(vector_size=vector_size)
self.position_dependent_weights = position_dependent_weights
self.position_dependent_vector_size = position_dependent_vector_size # fka pdw_size
self.vectors_vocab = None # fka syn0_vocab
self.vectors_ngrams = None # fka syn0_ngrams
self.vectors_positions = None # fka syn0_positions
self.buckets_word = None
self.vectors_positions = None
self.min_n = min_n
self.max_n = max_n
self.bucket = bucket # count of buckets, fka num_ngram_vectors
Expand Down Expand Up @@ -1343,7 +1345,7 @@ def get_vector(self, word, use_norm=False):
else:
return word_vec

def init_ngrams_weights(self, seed):
def init_ngrams_weights(self, seed, window):
"""Initialize the vocabulary and ngrams weights prior to training.
Creates the weight matrices and initializes them with uniform random values.
Expand All @@ -1352,6 +1354,8 @@ def init_ngrams_weights(self, seed):
----------
seed : float
The seed for the PRNG.
window : int
The size of the window used during the training.
Note
----
Expand All @@ -1362,10 +1366,9 @@ def init_ngrams_weights(self, seed):

rand_obj = np.random.default_rng(seed=seed) # use new instance of numpy's recommended generator/algorithm

lo, hi = -1.0 / self.vector_size, 1.0 / self.vector_size
vocab_shape = (len(self), self.vector_size)
ngrams_shape = (self.bucket, self.vector_size)
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
positions_shape = (2 * window, self.position_dependent_vector_size)
#
# We could have initialized vectors_ngrams at construction time, but we
# do it here for two reasons:
Expand All @@ -1375,23 +1378,15 @@ def init_ngrams_weights(self, seed):
# vectors_ngrams, and vectors_vocab cannot happen at construction
# time because the vocab is not initialized at that stage.
#
if self.position_dependent_weights:
hi = sqrt(sqrt(3.0) / self.vector_size)
lo = -hi
self.vectors_positions = rand_obj.uniform(lo, hi, positions_shape).astype(REAL)
else:
lo, hi = -1.0 / self.vector_size, 1.0 / self.vector_size
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
self.vectors_ngrams = rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL)

def init_positional_weights(self, window):
"""Initialize the positional weights prior to training.
Creates the weight matrix and initializes it with uniform random values.
Parameters
----------
window : int
The size of the window used during the training.
"""

positional_shape = (2 * window, self.position_dependent_vector_size)
self.vectors_positions = np.ones(positional_shape, dtype=REAL)

def update_ngrams_weights(self, seed, old_vocab_len):
"""Update the vocabulary weights for training continuation.
Expand All @@ -1413,7 +1408,8 @@ def update_ngrams_weights(self, seed, old_vocab_len):
rand_obj.seed(seed)

new_vocab = len(self) - old_vocab_len
self.vectors_vocab = _pad_random(self.vectors_vocab, new_vocab, rand_obj)
self.vectors_vocab = _pad_random(self.vectors_vocab, new_vocab, rand_obj,
squared=self.position_dependent_weights)

def init_post_load(self, fb_vectors):
"""Perform initialization after loading a native Facebook model.
Expand Down Expand Up @@ -1480,11 +1476,16 @@ def recalc_char_ngram_buckets(self):
)


def _pad_random(m, new_rows, rand):
def _pad_random(m, new_rows, rand, squared=False):
"""Pad a matrix with additional rows filled with random values."""
_, columns = m.shape
low, high = -1.0 / columns, 1.0 / columns
suffix = rand.uniform(low, high, (new_rows, columns)).astype(REAL)
shape = (new_rows, columns)
if squared:
high = sqrt(sqrt(3.0) / columns)
low = -high
else:
low, high = -1.0 / columns, 1.0 / columns
suffix = rand.uniform(low, high, shape).astype(REAL)
return vstack([m, suffix])


Expand Down

0 comments on commit fa9dfcf

Please sign in to comment.