Module ktrain.text.ner.anago.models
Model definition.
Expand source code
"""
Model definition.
"""
from .... import utils as U
from ....imports import *
# if U.is_tf_keras():
# from .layers import CRF
# else:
# from .layers_standalone import CRF
def save_model(model, weights_file, params_file):
with open(params_file, "w") as f:
params = model.to_json()
json.dump(json.loads(params), f, sort_keys=True, indent=4)
model.save_weights(weights_file)
def load_model(weights_file, params_file):
with open(params_file) as f:
model = keras.models.model_from_json(f.read(), custom_objects={"CRF": CRF})
model.load_weights(weights_file)
return model
class BiLSTMCRF(object):
"""A Keras implementation of BiLSTM-CRF for sequence labeling.
References
--
Guillaume Lample, Miguel Ballesteros, Sandeep Subramanian, Kazuya Kawakami, Chris Dyer.
"Neural Architectures for Named Entity Recognition". Proceedings of NAACL 2016.
https://arxiv.org/abs/1603.01360
"""
def __init__(
self,
num_labels,
word_vocab_size,
char_vocab_size=None,
word_embedding_dim=100,
char_embedding_dim=25,
word_lstm_size=100,
char_lstm_size=25,
fc_dim=100,
dropout=0.5,
embeddings=None,
use_char=True,
use_crf=True,
char_mask_zero=True,
use_elmo=False,
use_transformer_with_dim=None,
):
"""Build a Bi-LSTM CRF model.
Args:
word_vocab_size (int): word vocabulary size.
char_vocab_size (int): character vocabulary size.
num_labels (int): number of entity labels.
word_embedding_dim (int): word embedding dimensions.
char_embedding_dim (int): character embedding dimensions.
word_lstm_size (int): character LSTM feature extractor output dimensions.
char_lstm_size (int): word tagger LSTM output dimensions.
fc_dim (int): output fully-connected layer size.
dropout (float): dropout rate.
embeddings (numpy array): word embedding matrix.
use_char (boolean): add char feature.
use_crf (boolean): use crf as last layer.
char_mask_zero(boolean): mask zero for character embedding (see TF2 isse #33148 and #33069)
use_elmo(boolean): If True, model will be configured to accept Elmo embeddings
as an additional input to word and character embeddings
use_transformer_with_dim(int): If not None, model will be configured to accept
transformer embeddings of given dimension
"""
super(BiLSTMCRF).__init__()
self._char_embedding_dim = char_embedding_dim
self._word_embedding_dim = word_embedding_dim
self._char_lstm_size = char_lstm_size
self._word_lstm_size = word_lstm_size
self._char_vocab_size = char_vocab_size
self._word_vocab_size = word_vocab_size
self._fc_dim = fc_dim
self._dropout = dropout
self._use_char = use_char
self._use_crf = use_crf
self._embeddings = embeddings
self._num_labels = num_labels
self._char_mask_zero = char_mask_zero
self._use_elmo = use_elmo
self._use_transformer_with_dim = use_transformer_with_dim
def build(self):
# build word embedding
word_ids = keras.layers.Input(
batch_shape=(None, None), dtype="int32", name="word_input"
)
inputs = [word_ids]
embedding_list = []
if self._embeddings is None:
word_embeddings = keras.layers.Embedding(
input_dim=self._word_vocab_size,
output_dim=self._word_embedding_dim,
mask_zero=True,
name="word_embedding",
)(word_ids)
else:
word_embeddings = keras.layers.Embedding(
input_dim=self._embeddings.shape[0],
output_dim=self._embeddings.shape[1],
mask_zero=True,
weights=[self._embeddings],
name="word_embedding",
)(word_ids)
embedding_list.append(word_embeddings)
# build character based word embedding
if self._use_char:
char_ids = keras.layers.Input(
batch_shape=(None, None, None), dtype="int32", name="char_input"
)
inputs.append(char_ids)
char_embeddings = keras.layers.Embedding(
input_dim=self._char_vocab_size,
output_dim=self._char_embedding_dim,
mask_zero=self._char_mask_zero,
name="char_embedding",
)(char_ids)
char_embeddings = keras.layers.TimeDistributed(
keras.layers.Bidirectional(keras.layers.LSTM(self._char_lstm_size))
)(char_embeddings)
embedding_list.append(char_embeddings)
# add elmo embedding
if self._use_elmo:
elmo_embeddings = keras.layers.Input(shape=(None, 1024), dtype="float32")
inputs.append(elmo_embeddings)
embedding_list.append(elmo_embeddings)
# add transformer embedding
if self._use_transformer_with_dim is not None:
transformer_embeddings = keras.layers.Input(
shape=(None, self._use_transformer_with_dim), dtype="float32"
)
inputs.append(transformer_embeddings)
embedding_list.append(transformer_embeddings)
# concatenate embeddings
word_embeddings = (
keras.layers.Concatenate()(embedding_list)
if len(embedding_list) > 1
else embedding_list[0]
)
# build model
word_embeddings = keras.layers.Dropout(self._dropout)(word_embeddings)
z = keras.layers.Bidirectional(
keras.layers.LSTM(units=self._word_lstm_size, return_sequences=True)
)(word_embeddings)
z = keras.layers.Dense(self._fc_dim, activation="tanh")(z)
if self._use_crf:
from .layers import CRF
crf = CRF(self._num_labels, sparse_target=False)
loss = crf.loss_function
pred = crf(z)
else:
loss = "categorical_crossentropy"
pred = keras.layers.Dense(self._num_labels, activation="softmax")(z)
model = keras.Model(inputs=inputs, outputs=pred)
return model, loss
Functions
def load_model(weights_file, params_file)
-
Expand source code
def load_model(weights_file, params_file): with open(params_file) as f: model = keras.models.model_from_json(f.read(), custom_objects={"CRF": CRF}) model.load_weights(weights_file) return model
def save_model(model, weights_file, params_file)
-
Expand source code
def save_model(model, weights_file, params_file): with open(params_file, "w") as f: params = model.to_json() json.dump(json.loads(params), f, sort_keys=True, indent=4) model.save_weights(weights_file)
Classes
class BiLSTMCRF (num_labels, word_vocab_size, char_vocab_size=None, word_embedding_dim=100, char_embedding_dim=25, word_lstm_size=100, char_lstm_size=25, fc_dim=100, dropout=0.5, embeddings=None, use_char=True, use_crf=True, char_mask_zero=True, use_elmo=False, use_transformer_with_dim=None)
-
A Keras implementation of BiLSTM-CRF for sequence labeling.
References
Guillaume Lample, Miguel Ballesteros, Sandeep Subramanian, Kazuya Kawakami, Chris Dyer. "Neural Architectures for Named Entity Recognition". Proceedings of NAACL 2016. https://arxiv.org/abs/1603.01360
Build a Bi-LSTM CRF model.
Args
word_vocab_size
:int
- word vocabulary size.
char_vocab_size
:int
- character vocabulary size.
num_labels
:int
- number of entity labels.
word_embedding_dim
:int
- word embedding dimensions.
char_embedding_dim
:int
- character embedding dimensions.
word_lstm_size
:int
- character LSTM feature extractor output dimensions.
char_lstm_size
:int
- word tagger LSTM output dimensions.
fc_dim
:int
- output fully-connected layer size.
dropout
:float
- dropout rate.
embeddings
:numpy array
- word embedding matrix.
use_char
:boolean
- add char feature.
use_crf
:boolean
- use crf as last layer.
char_mask_zero(boolean): mask zero for character embedding (see TF2 isse #33148 and #33069) use_elmo(boolean): If True, model will be configured to accept Elmo embeddings as an additional input to word and character embeddings use_transformer_with_dim(int): If not None, model will be configured to accept transformer embeddings of given dimension
Expand source code
class BiLSTMCRF(object): """A Keras implementation of BiLSTM-CRF for sequence labeling. References -- Guillaume Lample, Miguel Ballesteros, Sandeep Subramanian, Kazuya Kawakami, Chris Dyer. "Neural Architectures for Named Entity Recognition". Proceedings of NAACL 2016. https://arxiv.org/abs/1603.01360 """ def __init__( self, num_labels, word_vocab_size, char_vocab_size=None, word_embedding_dim=100, char_embedding_dim=25, word_lstm_size=100, char_lstm_size=25, fc_dim=100, dropout=0.5, embeddings=None, use_char=True, use_crf=True, char_mask_zero=True, use_elmo=False, use_transformer_with_dim=None, ): """Build a Bi-LSTM CRF model. Args: word_vocab_size (int): word vocabulary size. char_vocab_size (int): character vocabulary size. num_labels (int): number of entity labels. word_embedding_dim (int): word embedding dimensions. char_embedding_dim (int): character embedding dimensions. word_lstm_size (int): character LSTM feature extractor output dimensions. char_lstm_size (int): word tagger LSTM output dimensions. fc_dim (int): output fully-connected layer size. dropout (float): dropout rate. embeddings (numpy array): word embedding matrix. use_char (boolean): add char feature. use_crf (boolean): use crf as last layer. char_mask_zero(boolean): mask zero for character embedding (see TF2 isse #33148 and #33069) use_elmo(boolean): If True, model will be configured to accept Elmo embeddings as an additional input to word and character embeddings use_transformer_with_dim(int): If not None, model will be configured to accept transformer embeddings of given dimension """ super(BiLSTMCRF).__init__() self._char_embedding_dim = char_embedding_dim self._word_embedding_dim = word_embedding_dim self._char_lstm_size = char_lstm_size self._word_lstm_size = word_lstm_size self._char_vocab_size = char_vocab_size self._word_vocab_size = word_vocab_size self._fc_dim = fc_dim self._dropout = dropout self._use_char = use_char self._use_crf = use_crf self._embeddings = embeddings self._num_labels = num_labels self._char_mask_zero = char_mask_zero self._use_elmo = use_elmo self._use_transformer_with_dim = use_transformer_with_dim def build(self): # build word embedding word_ids = keras.layers.Input( batch_shape=(None, None), dtype="int32", name="word_input" ) inputs = [word_ids] embedding_list = [] if self._embeddings is None: word_embeddings = keras.layers.Embedding( input_dim=self._word_vocab_size, output_dim=self._word_embedding_dim, mask_zero=True, name="word_embedding", )(word_ids) else: word_embeddings = keras.layers.Embedding( input_dim=self._embeddings.shape[0], output_dim=self._embeddings.shape[1], mask_zero=True, weights=[self._embeddings], name="word_embedding", )(word_ids) embedding_list.append(word_embeddings) # build character based word embedding if self._use_char: char_ids = keras.layers.Input( batch_shape=(None, None, None), dtype="int32", name="char_input" ) inputs.append(char_ids) char_embeddings = keras.layers.Embedding( input_dim=self._char_vocab_size, output_dim=self._char_embedding_dim, mask_zero=self._char_mask_zero, name="char_embedding", )(char_ids) char_embeddings = keras.layers.TimeDistributed( keras.layers.Bidirectional(keras.layers.LSTM(self._char_lstm_size)) )(char_embeddings) embedding_list.append(char_embeddings) # add elmo embedding if self._use_elmo: elmo_embeddings = keras.layers.Input(shape=(None, 1024), dtype="float32") inputs.append(elmo_embeddings) embedding_list.append(elmo_embeddings) # add transformer embedding if self._use_transformer_with_dim is not None: transformer_embeddings = keras.layers.Input( shape=(None, self._use_transformer_with_dim), dtype="float32" ) inputs.append(transformer_embeddings) embedding_list.append(transformer_embeddings) # concatenate embeddings word_embeddings = ( keras.layers.Concatenate()(embedding_list) if len(embedding_list) > 1 else embedding_list[0] ) # build model word_embeddings = keras.layers.Dropout(self._dropout)(word_embeddings) z = keras.layers.Bidirectional( keras.layers.LSTM(units=self._word_lstm_size, return_sequences=True) )(word_embeddings) z = keras.layers.Dense(self._fc_dim, activation="tanh")(z) if self._use_crf: from .layers import CRF crf = CRF(self._num_labels, sparse_target=False) loss = crf.loss_function pred = crf(z) else: loss = "categorical_crossentropy" pred = keras.layers.Dense(self._num_labels, activation="softmax")(z) model = keras.Model(inputs=inputs, outputs=pred) return model, loss
Methods
def build(self)
-
Expand source code
def build(self): # build word embedding word_ids = keras.layers.Input( batch_shape=(None, None), dtype="int32", name="word_input" ) inputs = [word_ids] embedding_list = [] if self._embeddings is None: word_embeddings = keras.layers.Embedding( input_dim=self._word_vocab_size, output_dim=self._word_embedding_dim, mask_zero=True, name="word_embedding", )(word_ids) else: word_embeddings = keras.layers.Embedding( input_dim=self._embeddings.shape[0], output_dim=self._embeddings.shape[1], mask_zero=True, weights=[self._embeddings], name="word_embedding", )(word_ids) embedding_list.append(word_embeddings) # build character based word embedding if self._use_char: char_ids = keras.layers.Input( batch_shape=(None, None, None), dtype="int32", name="char_input" ) inputs.append(char_ids) char_embeddings = keras.layers.Embedding( input_dim=self._char_vocab_size, output_dim=self._char_embedding_dim, mask_zero=self._char_mask_zero, name="char_embedding", )(char_ids) char_embeddings = keras.layers.TimeDistributed( keras.layers.Bidirectional(keras.layers.LSTM(self._char_lstm_size)) )(char_embeddings) embedding_list.append(char_embeddings) # add elmo embedding if self._use_elmo: elmo_embeddings = keras.layers.Input(shape=(None, 1024), dtype="float32") inputs.append(elmo_embeddings) embedding_list.append(elmo_embeddings) # add transformer embedding if self._use_transformer_with_dim is not None: transformer_embeddings = keras.layers.Input( shape=(None, self._use_transformer_with_dim), dtype="float32" ) inputs.append(transformer_embeddings) embedding_list.append(transformer_embeddings) # concatenate embeddings word_embeddings = ( keras.layers.Concatenate()(embedding_list) if len(embedding_list) > 1 else embedding_list[0] ) # build model word_embeddings = keras.layers.Dropout(self._dropout)(word_embeddings) z = keras.layers.Bidirectional( keras.layers.LSTM(units=self._word_lstm_size, return_sequences=True) )(word_embeddings) z = keras.layers.Dense(self._fc_dim, activation="tanh")(z) if self._use_crf: from .layers import CRF crf = CRF(self._num_labels, sparse_target=False) loss = crf.loss_function pred = crf(z) else: loss = "categorical_crossentropy" pred = keras.layers.Dense(self._num_labels, activation="softmax")(z) model = keras.Model(inputs=inputs, outputs=pred) return model, loss