我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.layers.Bidirectional()。
def __call__(self, inputs): x = self._merge_inputs(inputs) shape = getattr(x, '_keras_shape') replicate_model = self._replicate_model(kl.Input(shape=shape[2:])) x = kl.TimeDistributed(replicate_model)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Bidirectional(kl.GRU(128, kernel_regularizer=kernel_regularizer, return_sequences=True), merge_mode='concat')(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) gru = kl.GRU(256, kernel_regularizer=kernel_regularizer) x = kl.Bidirectional(gru)(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def HAN1(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): #model = Sequential() wordInputs = Input(shape=(MAX_WORDS,), name='word1', dtype='float32') wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='emb1')(wordInputs) #Assuming all the sentences have same number of words. Check for input_length again. hij = Bidirectional(GRU(WORDGRU, name='gru1', return_sequences=True))(wordEmbedding) wordDrop = Dropout(DROPOUTPER, name='drop1')(hij) alpha_its, Si = AttentionLayer(name='att1')(wordDrop) v6 = Dense(1, activation="sigmoid", name="dense")(Si) #model.add(Dense(1, activation="sigmoid", name="documentOut3")) model = Model(inputs=[wordInputs] , outputs=[v6]) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def fGRU_avg(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_SENTS+1, MAX_WORDS), name="wordInputs", dtype='float32') wordInp = Flatten()(wordInputs) wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInp) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) head = GlobalAveragePooling1D()(hij) v6 = Dense(1, activation="sigmoid", name="dense")(head) model = Model(inputs=[wordInputs] , outputs=[v6]) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def __init__(self, n_classes, vocab_size, max_len, num_units=128, useBiDirection=False, useAttention=False, learning_rate=0.001, dropout=0, embedding_size=300): self.model = Sequential() self.model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_len)) lstm_model = LSTM(num_units, dropout=dropout) if useBiDirection: lstm_model = Bidirectional(lstm_model) if useAttention: lstm_model = lstm_model print("Attention not implement yet ... ") self.model.add(lstm_model) self.model.add(Dense(n_classes, activation='softmax')) self.model.summary() self.model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=learning_rate), metrics=['accuracy'])
def create_BiLSTM(wordvecs, lstm_dim=300, output_dim=2, dropout=.5, weights=None, train=True): model = Sequential() if weights != None: model.add(Embedding(len(wordvecs)+1, len(wordvecs['the']), weights=[weights], trainable=train)) else: model.add(Embedding(len(wordvecs)+1, len(wordvecs['the']), trainable=train)) model.add(Dropout(dropout)) model.add(Bidirectional(LSTM(lstm_dim))) model.add(Dropout(dropout)) model.add(Dense(output_dim, activation='softmax')) if output_dim == 2: model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) else: model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) return model
def print_results(bi, file, out_file, file_type): names, results, std_devs, dim = test_embeddings(bi, file, file_type) rr = [[u'{0:.3f} \u00B1{1:.3f}'.format(r, s) for r, s in zip(result, std_dev)] for result, std_dev in zip(results, std_devs)] table_data = [[name] + result for name, result in zip(names, rr)] table = tabulate.tabulate(table_data, headers=['dataset', 'acc', 'prec', 'rec', 'f1'], tablefmt='simple', floatfmt='.3f') if out_file: with open(out_file, 'a') as f: f.write('\n') if bi: f.write('+++Bidirectional LSTM+++\n') else: f.write('+++LSTM+++\n') f.write(table) f.write('\n') else: print() if bi: print('Bidirectional LSTM') else: print('LSTM') print(table)
def build(self) -> None: """ ?????????? ??????. """ inp = Input(shape=(None,)) emb = Embedding(len(self.grapheme_alphabet), self.emb_dimension)(inp) encoded = Bidirectional(self.rnn(self.units1, return_sequences=True, recurrent_dropout=self.dropout))(emb) encoded = Dropout(self.dropout)(encoded) decoded = TimeDistributed(Dense(self.units2, activation="relu"))(encoded) predictions = TimeDistributed(Dense(len(self.phonetic_alphabet), activation="softmax"))(decoded) model = Model(inputs=inp, outputs=predictions) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) self.model = model
def build(self) -> None: """ ?????????? ??????. """ inp = Input(shape=(None,)) emb = Embedding(len(self.grapheme_set), self.emb_dimension)(inp) encoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(emb) encoded = Dropout(self.dropout)(encoded) decoded = TimeDistributed(Dense(self.units, activation="relu"))(encoded) predictions = TimeDistributed(Dense(3, activation="softmax"))(decoded) model = Model(inputs=inp, outputs=predictions) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) self.model = model
def build(self) -> None: """ ?????????? ??????. """ inp = Input(shape=(None,)) emb = Embedding(len(self.phonetic_alphabet), self.emb_dimension)(inp) encoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(emb) encoded = Dropout(self.dropout)(encoded) decoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(encoded) decoded = Dropout(self.dropout)(decoded) predictions = TimeDistributed(Dense(3, activation="softmax"))(decoded) model = Model(inputs=inp, outputs=predictions) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) self.model = model
def __call__(self, inputs): x = inputs[0] kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(256, 7, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) gru = kl.recurrent.GRU(256, kernel_regularizer=kernel_regularizer) x = kl.Bidirectional(gru)(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def __init__(self, nlp, expressions): self.nlp = nlp self.all_expressions = expressions self.num_features = nlp.vocab.vectors_length self.categories = list(set([expression.intent().name for expression in expressions])) intents = [expression.intent().name for expression in expressions] intents_indices = [self.categories.index(intent) for intent in intents] self.x = np.zeros(shape=(len(expressions), DOCUMENT_MAX_NUM_WORDS, self.num_features)).astype('float32') self.__init_x(self.x, expressions) self.y = np_utils.to_categorical(intents_indices) self.model = Sequential() self.model.add(Bidirectional(LSTM(int(DOCUMENT_MAX_NUM_WORDS * 1.5)), input_shape=(DOCUMENT_MAX_NUM_WORDS, self.num_features))) self.model.add(Dropout(0.3)) self.model.add(Dense(len(self.categories), activation='sigmoid')) self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
def bidirectional_lstm(len_output): # sequence_input is a matrix of glove vectors (one for each input word) sequence_input = Input( shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM,), dtype='float32') l_lstm = Bidirectional(LSTM(100))(sequence_input) preds = Dense(len_output, activation='softmax')(l_lstm) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=[utils.f1_score, 'categorical_accuracy']) """ model.add(Bidirectional(LSTM(shape['nr_hidden']))) # dropout to avoid overfitting model.add(Dropout(settings['dropout'])) model.add(Dense(shape['nr_class'], activation='sigmoid')) model.compile(optimizer=Adam(lr=settings['lr']), loss='binary_crossentropy', metrics=['accuracy']) """ return model
def bidirectional_gru(len_output): # sequence_input is a matrix of glove vectors (one for each input word) sequence_input = Input( shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM,), dtype='float32') l_lstm = Bidirectional(GRU(100))(sequence_input) # TODO look call(input_at_t, states_at_t) method, returning (output_at_t, states_at_t_plus_1) # also look at switch(condition, then_expression, else_expression) for deciding when to feed previous state preds = Dense(len_output, activation='softmax')(l_lstm) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=[utils.f1_score, 'categorical_accuracy']) return model # required, see values below
def BiGRU(X_train, y_train, X_test, y_test, gru_units, dense_units, input_shape, \ batch_size, epochs, drop_out, patience): model = Sequential() reg = L1L2(l1=0.2, l2=0.2) model.add(Bidirectional(GRU(units = gru_units, dropout= drop_out, activation='relu', recurrent_regularizer = reg, return_sequences = True), input_shape = input_shape, merge_mode="concat")) model.add(BatchNormalization()) model.add(TimeDistributed(Dense(dense_units, activation='relu'))) model.add(BatchNormalization()) model.add(Bidirectional(GRU(units = gru_units, dropout= drop_out, activation='relu', recurrent_regularizer=reg, return_sequences = True), merge_mode="concat")) model.add(BatchNormalization()) model.add(Dense(units=1)) model.add(GlobalAveragePooling1D()) print(model.summary()) early_stopping = EarlyStopping(monitor="val_loss", patience = patience) model.compile(loss='mse', optimizer= 'adam') history_callback = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,\ verbose=2, callbacks=[early_stopping], validation_data=[X_test, y_test], shuffle = True) return model, history_callback
def _get_encoder_layer(self): if self.encoder_layer is None: self.encoder_layer = LSTM(input_dim=self.embed_dim, output_dim=self.embed_dim, return_sequences=self.return_sequences, name="encoder") if self.bidirectional: self.encoder_layer = Bidirectional(self.encoder_layer, name="encoder") return self.encoder_layer
def _get_encoder_layer(self): if self.encoder_layer is None: self.encoder_layer = OntoAttentionLSTM(input_dim=self.embed_dim, output_dim=self.embed_dim, num_senses=self.num_senses, num_hyps=self.num_hyps, use_attention=self.use_attention, consume_less="gpu", return_sequences=self.return_sequences, name="onto_lstm") if self.bidirectional: self.encoder_layer = Bidirectional(self.encoder_layer, name="onto_lstm") return self.encoder_layer
def define_attention_model(self): ''' Take necessary parts out of the model to get OntoLSTM attention. ''' if not self.model: raise RuntimeError("Model not trained yet!") input_shape = self.model.get_input_shape_at(0) input_layer = Input(input_shape[1:], dtype='int32') # removing batch size embedding_layer = None encoder_layer = None for layer in self.model.layers: if layer.name == "embedding": embedding_layer = layer elif layer.name == "onto_lstm": # We need to redefine the OntoLSTM layer with the learned weights and set return attention to True. # Assuming we'll want attention values for all words (return_sequences = True) if isinstance(layer, Bidirectional): onto_lstm = OntoAttentionLSTM(input_dim=self.embed_dim, output_dim=self.embed_dim, num_senses=self.num_senses, num_hyps=self.num_hyps, use_attention=True, return_attention=True, return_sequences=True, consume_less='gpu') encoder_layer = Bidirectional(onto_lstm, weights=layer.get_weights()) else: encoder_layer = OntoAttentionLSTM(input_dim=self.embed_dim, output_dim=self.embed_dim, num_senses=self.num_senses, num_hyps=self.num_hyps, use_attention=True, return_attention=True, return_sequences=True, consume_less='gpu', weights=layer.get_weights()) break if not embedding_layer or not encoder_layer: raise RuntimeError("Required layers not found!") attention_output = encoder_layer(embedding_layer(input_layer)) self.attention_model = Model(inputs=input_layer, outputs=attention_output) print >>sys.stderr, "Attention model summary:" self.attention_model.summary() self.attention_model.compile(loss="mse", optimizer="sgd") # Loss and optimizer do not matter!
def bi_lstm(input_shape, n_classes,layers=2, neurons=80, dropout=0.3): """ just replace ANN by RNNs """ model = Sequential(name='pure_rnn') model.add(Bidirectional(LSTM(neurons, return_sequences=False if layers==1 else True, dropout=dropout, recurrent_dropout=dropout), input_shape=input_shape)) model.add(LSTM(neurons, return_sequences=False if layers==1 else True, input_shape=input_shape,dropout=dropout, recurrent_dropout=dropout)) for i in range(layers-1): model.add(Bidirectional(LSTM(neurons, return_sequences=False if i==layers-2 else True,dropout=dropout, recurrent_dropout=dropout))) model.add(Dense(n_classes, activation = 'softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001), metrics=[keras.metrics.categorical_accuracy]) return model
def GRUf(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_SENTS+1, MAX_WORDS), name="wordInputs", dtype='float32') wordInp = Flatten()(wordInputs) wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInp) hij = Bidirectional(GRU(WORDGRU, return_sequences=False), name='gru1')(wordEmbedding) v6 = Dense(1, activation="sigmoid", name="dense")(hij) model = Model(inputs=[wordInputs] , outputs=[v6]) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def fhan2_avg(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32') wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInputs) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) Si = GlobalAveragePooling1D()(hij) wordEncoder = Model(wordInputs, Si) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') #sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(docInputs) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) Vb = GlobalAveragePooling1D()(hi) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'glorot_uniform', name="dense")(Vb) model = Model(inputs=[docInputs] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def han2(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32') #print 'in han2 max-nb-words' #print MAX_NB_WORDS wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='wordEmbedding')(wordInputs) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) alpha_its, Si = AttentionLayer(name='att1')(hij) #wordDrop = Dropout(DROPOUTPER, name='wordDrop')(Si) wordEncoder = Model(wordInputs, Si) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(sentenceMasking) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) alpha_s, Vb = AttentionLayer(name='att2')(hi) #sentDrop = Dropout(DROPOUTPER, name='sentDrop')(Vb) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(Vb) model = Model(inputs=[docInputs] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def biLSTM_encoder(input, units, dropout, recurrent_dropout, num_layers): """Question and context encoder. Just Bi-LSTM from keras.""" encoder = input for i in range(num_layers): encoder = Bidirectional(LSTM(units=units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=True, dropout=dropout, recurrent_dropout = recurrent_dropout, unroll=False)) (encoder) return encoder
def build_model(self, x): for i, n in enumerate(self.hidden_dims): is_last_layer = i == len(self.hidden_dims) - 1 rnn = self.rnn_class(n, return_sequences=not is_last_layer, **self.rnn_kwargs) if self.bidirectional: x = Bidirectional(rnn)(x) else: x = rnn(x) return x
def build_model(self, x): rnn = self.rnn_class(self.encoder_dims, return_sequences=True, **self.rnn_kwargs) if self.bidirectional: word_activations = Bidirectional(rnn)(x) else: word_activations = rnn(x) attention_layer = AttentionLayer() doc_vector = attention_layer(word_activations) self.attention_tensor = attention_layer.get_attention_tensor() return doc_vector
def get_lstm_model(): input = Input(shape=(maxlen, 128)) output = Bidirectional(LSTM(64, return_sequences=True))(input) return Model(input, output)
def __init__(self, training, sequence_length=None, vocabulary_size=None, train_embeddings=SequentialTextEmbeddingClassifier.TRAIN_EMBEDDINGS, language_model=LANGUAGE_MODEL, rnn_type=RNN_TYPE, rnn_units=RNN_UNITS, bidirectional=BIDIRECTIONAL, dropout=DROPOUT, learning_rate=LEARNING_RATE): from keras.models import Sequential from keras.layers import Bidirectional, Dense, Dropout, GRU, LSTM from keras.optimizers import Adam label_names, sequence_length, vocabulary_size = self.parameters_from_training(sequence_length, vocabulary_size, training, language_model) embedder = TextSequenceEmbedder(vocabulary_size, sequence_length, language_model) model = Sequential() model.add(self.embedding_layer(embedder, sequence_length, train_embeddings, mask_zero=True, name="embedding")) rnn_class = {"lstm": LSTM, "gru": GRU}[rnn_type] for i, units in enumerate(rnn_units, 1): name = "rnn-%d" % i return_sequences = i < len(rnn_units) if bidirectional: rnn = Bidirectional(rnn_class(units, return_sequences=return_sequences), name=name) else: rnn = rnn_class(units, return_sequences=return_sequences, name=name) model.add(rnn) model.add(Dropout(dropout, name="dropout-%d" % i)) model.add(Dense(len(label_names), activation="softmax", name="softmax")) optimizer = Adam(lr=learning_rate) model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) self.rnn_units = rnn_units self.bidirectional = bidirectional self.dropout = dropout super().__init__(model, embedder, label_names)
def Make_model(self): model = Sequential() model.add(Bidirectional(LSTM(self.n_hidden),input_shape=(n_time,n_in))) model.add(Dense(n_out,kernel_initializer=self.weight)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy',optimizer=Adam(lr=0.001,beta_1=0.9,beta_2=0.999),metrics=['accuracy']) return model
def building_ner(num_lstm_layer, num_hidden_node, dropout, time_step, vector_length, output_lenght): model = Sequential() model.add(Masking(mask_value=0., input_shape=(time_step, vector_length))) for i in range(num_lstm_layer-1): model.add(Bidirectional(LSTM(units=num_hidden_node, return_sequences=True, dropout=dropout, recurrent_dropout=dropout))) model.add(Bidirectional(LSTM(units=num_hidden_node, return_sequences=True, dropout=dropout, recurrent_dropout=dropout), merge_mode='concat')) model.add(TimeDistributed(Dense(output_lenght))) model.add(Activation('softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model
def create_simpleCnnRnn(image_shape, max_caption_len,vocab_size): image_model = Sequential() # image_shape : C,W,H # input: 100x100 images with 3 channels -> (3, 100, 100) tensors. # this applies 32 convolution filters of size 3x3 each. image_model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=image_shape)) image_model.add(BatchNormalization()) image_model.add(Activation('relu')) image_model.add(Convolution2D(32, 3, 3)) image_model.add(BatchNormalization()) image_model.add(Activation('relu')) image_model.add(MaxPooling2D(pool_size=(2, 2))) image_model.add(Dropout(0.25)) image_model.add(Convolution2D(64, 3, 3, border_mode='valid')) image_model.add(BatchNormalization()) image_model.add(Activation('relu')) image_model.add(Convolution2D(64, 3, 3)) image_model.add(BatchNormalization()) image_model.add(Activation('relu')) image_model.add(MaxPooling2D(pool_size=(2, 2))) image_model.add(Dropout(0.25)) image_model.add(Flatten()) # Note: Keras does automatic shape inference. image_model.add(Dense(128)) image_model.add(RepeatVector(max_caption_len)) image_model.add(Bidirectional(GRU(output_dim=128, return_sequences=True))) #image_model.add(GRU(output_dim=128, return_sequences=True)) image_model.add(TimeDistributed(Dense(vocab_size))) image_model.add(Activation('softmax')) return image_model
def __call__(self, inputs): x = self._merge_inputs(inputs) shape = getattr(x, '_keras_shape') replicate_model = self._replicate_model(kl.Input(shape=shape[2:])) x = kl.TimeDistributed(replicate_model)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) gru = kl.GRU(256, kernel_regularizer=kernel_regularizer) x = kl.Bidirectional(gru)(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def create_two_level_bi_lstm(input_4d, embedding_layer, max_sentence_length, max_n_analyses, max_word_root_length, lstm_dim, embedding_dim): r = Reshape((max_sentence_length * max_n_analyses * max_word_root_length,)) # input_4d = Lambda(lambda x: x, output_shape=lambda s: s)(input_4d) rr = r(input_4d) input_embeddings = embedding_layer(rr) print input_embeddings r = MaskedReshape( (max_sentence_length * max_n_analyses, max_word_root_length, embedding_dim), (max_sentence_length * max_n_analyses, max_word_root_length)) # input_embeddings = Lambda(lambda x: x, output_shape=lambda s: s)(input_embeddings) rr = r(input_embeddings) lstm_layer = Bidirectional(LSTM(lstm_dim, input_shape=(max_word_root_length, embedding_dim))) td_lstm_layer = TimeDistributed(lstm_layer, input_shape=(max_word_root_length, embedding_dim)) lstm_layer_output = td_lstm_layer(rr) lstm_layer_output_relu = Activation('relu')(lstm_layer_output) print "lstm_layer_output_relu", lstm_layer_output_relu r = Reshape((max_sentence_length, max_n_analyses, 2 * lstm_dim)) lstm_layer_output_relu = Lambda(lambda x: x, output_shape=lambda s: s)(lstm_layer_output_relu) lstm_layer_output_relu_reshaped = r(lstm_layer_output_relu) print "lstm_layer_output_relu_reshaped", lstm_layer_output_relu_reshaped return input_embeddings, lstm_layer_output_relu_reshaped
def build_models(params, index_embedding): in_layer = Input(shape=(params['max_len'],), dtype='int32') mid_layer = Embedding(input_dim=params['num_words'], output_dim=params['embedding_len'], weights=[index_embedding])(in_layer) # mid_layer = LSTM(params['lstm_output_dim'], return_sequences=True, # dropout=0.5, recurrent_dropout=0.5)(mid_layer) mid_layer = Bidirectional(LSTM(params['lstm_output_dim'], return_sequences=True, dropout=0.5, recurrent_dropout=0.5)) (mid_layer) mid_layer = Dense(params['dense_units'])(mid_layer) mid_layer = Dropout(0.3)(mid_layer) mid_layer = Flatten()(mid_layer) if (params['num_class'] == 2): loss = 'binary_crossentropy' out_layer = Dense(units=1, activation='sigmoid')(mid_layer) else: loss = 'categorical_crossentropy' out_layer = Dense(units=params['num_class'], activation='softmax')(mid_layer) single_model = Model(inputs=in_layer, outputs=out_layer) single_model.compile(loss=loss, optimizer='rmsprop', metrics=['accuracy']) return single_model
def lstm_units(self): lstm = self.model.get_layer("lstm") if isinstance(lstm, Bidirectional): lstm = lstm.layer return lstm.units
def bidirectional(self): return isinstance(self.model.get_layer("lstm"), Bidirectional)
def __init__(self, word_index, embedding_matrix): embedding_layer_c = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_C, trainable=False) embedding_layer_q = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_Q, trainable=False) embedding_layer_a = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_A, trainable=False) context = Input(shape=(MAX_SEQUENCE_LENGTH_C,), dtype='int32', name='context') question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question') answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer') embedded_context = embedding_layer_c(context) embedded_question = embedding_layer_q(question) embedded_answer = embedding_layer_a(answer) l_lstm_c = Bidirectional(LSTM(60))(embedded_context) l_lstm_q = Bidirectional(LSTM(60))(embedded_question) l_lstm_a = Bidirectional(LSTM(60))(embedded_answer) concat_c_q = concatenate([l_lstm_q, l_lstm_c], axis=1) relu_c_q = Dense(100, activation='relu')(concat_c_q) relu_c_q = Dropout(0.25)(relu_c_q) concat_c_q_a = concatenate([l_lstm_a, relu_c_q], axis = 1) softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a) self.model = Model([question, answer, context], softmax_c_q_a) opt = Nadam() self.model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])
def __init__(self, word_index, embedding_matrix): embedding_layer_c = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_C, trainable=False) embedding_layer_q = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_Q, trainable=False) embedding_layer_a = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_A, trainable=False) context = Input(shape=(MAX_SEQUENCE_LENGTH_C,), dtype='int32', name='context') question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question') answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer') embedded_context = embedding_layer_c(context) embedded_question = embedding_layer_q(question) embedded_answer = embedding_layer_a(answer) l_lstm_c = Bidirectional(LSTM(60, return_sequences=True))(embedded_context) l_lstm_c = Bidirectional(LSTM(60))(l_lstm_c) l_lstm_q = Bidirectional(LSTM(60))(embedded_question) l_lstm_a = Bidirectional(LSTM(60))(embedded_answer) concat_c_q = concatenate([l_lstm_q, l_lstm_c], axis=1) relu_c_q = Dense(100, activation='relu')(concat_c_q) relu_c_q = Dropout(0.25)(relu_c_q) concat_c_q_a = concatenate([l_lstm_a, relu_c_q], axis = 1) softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a) self.model = Model([question, answer, context], softmax_c_q_a) opt = Nadam() self.model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])
def __init__(self, word_index, embedding_matrix): embedding_layer_q = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_Q, trainable=False) embedding_layer_a = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_A, trainable=False) question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question') answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer') embedded_question = embedding_layer_q(question) embedded_answer = embedding_layer_a(answer) l_lstm_q = Bidirectional(LSTM(60))(embedded_question) l_lstm_a = Bidirectional(LSTM(60))(embedded_answer) concat_c_q_a = concatenate([l_lstm_a, l_lstm_q], axis = 1) softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a) self.model = Model([question, answer], softmax_c_q_a) opt = Nadam() self.model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])
def __init__(self, word_index, embedding_matrix): embedding_layer_c = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_C, trainable=False) embedding_layer_q = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_Q, trainable=False) embedding_layer_a = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH_A, trainable=False) context = Input(shape=(MAX_SEQUENCE_LENGTH_C,), dtype='int32', name='context') question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question') answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer') embedded_context = embedding_layer_c(context) embedded_question = embedding_layer_q(question) embedded_answer = embedding_layer_a(answer) l_lstm_c = Bidirectional(LSTM(60))(embedded_context) l_lstm_q = Bidirectional(LSTM(60))(embedded_question) l_lstm_a = Bidirectional(LSTM(60))(embedded_answer) concat_c_q = concatenate([l_lstm_q, l_lstm_c], axis=1) relu_c_q = Dense(100, activation='tanh')(concat_c_q) concat_c_a = concatenate([l_lstm_a, l_lstm_c], axis=1) relu_c_a = Dense(100, activation='tanh')(concat_c_a) relu_c_q = Dropout(0.5)(relu_c_q) relu_c_a = Dropout(0.5)(relu_c_a) concat_c_q_a = merge([relu_c_a, relu_c_q], mode='cos') softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a) self.model = Model([question, answer, context], softmax_c_q_a) opt = Nadam() self.model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])
def build_model(data, word_weights, tag_window=5, embed_dim=100): batch_size = 32 nb_epoch = 16 nb_class = 4 hidden_dim = 128 train_x = np.array(list(data['x'])) train_y = np.array(list(data['y'])) train_y = np_utils.to_categorical(train_y, nb_class) print(train_x.shape) print(train_y.shape) input_x = Input(shape=(tag_window, ), dtype='float32', name='input_x') embed_x = Embedding(output_dim=embed_dim, input_dim=word_weights.shape[0], input_length=tag_window, weights=[word_weights], name='embed_x')(input_x) bi_lstm = Bidirectional(LSTM(hidden_dim, return_sequences=False), merge_mode='sum')(embed_x) x_dropout = Dropout(0.5)(bi_lstm) x_output = Dense(nb_class, # kernel_regularizer=regularizers.l2(0.01), # kernel_constraint=maxnorm(3.0), # activity_regularizer=regularizers.l2(0.01), activation='softmax')(x_dropout) model = Model(input=[input_x], output=[x_output]) model.compile(optimizer='adamax', loss='categorical_crossentropy',metrics=['accuracy']) model.fit([train_x], [train_y], validation_split=0.2, batch_size=batch_size, epochs=nb_epoch, shuffle=True)
def LSTMLayer(embed_matrix, embed_input, sequence_length, dropout_prob, hidden_dims, embedding_dim=300, lstm_dim=100): model = Sequential() model.add(Embedding(embed_input, embedding_dim, input_length=sequence_length, weights=[embed_matrix])) model.add(Bidirectional(MGU(lstm_dim, return_sequences=True))) #model.add(AttentionLayer(lstm_dim)) model.add(GlobalMaxPooling1D()) # 3. Hidden Layer model.add(Dense(hidden_dims)) model.add(Dropout(dropout_prob[1])) model.add(Activation('relu')) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy']) return model
def HierarchicalRNN(embed_matrix, max_words, ans_cnt, sequence_length, embedding_dim, lstm_dim=100): ''' Hierachical RNN model Input: (batch_size, answers, answer words) Args: embed_matrix: word embedding max words: word dict size of embedding layer ans_cnt: answer count sequence_length: answer words count embedding_dim: embedding dimention lstm_dim: ''' hnn = Sequential() x = Input(shape=(ans_cnt, sequence_length)) # 1. time distributed word embedding: (None, steps, words, embed_dim) words_embed = TimeDistributed(Embedding(max_words, embedding_dim,input_length=sequence_length,weights=[embed_matrix]))(x) # 2. word level lstm embedding: --> (None, steps/sentence_num, hidden/sent_words, hidden_dim) word_lstm = TimeDistributed(Bidirectional(MGU(lstm_dim, return_sequences=True)))(words_embed) # 3. average pooling : --> (None,steps,dim) word_avg = TimeDistributed(GlobalMaxPooling1D())(word_lstm) #word_avg = TimeDistributed(AttentionLayer(lstm_dim*2))(word_lstm) # 4. sentence lstm: --> (None, hidden, hidden_dim) sent_lstm = Bidirectional(MGU(lstm_dim, return_sequences=True))(word_avg) # 5. pooling: --> (None, hidden_dim) sent_avg = GlobalMaxPooling1D()(sent_lstm) #sent_avg = AttentionLayer(lstm_dim*2)(sent_lstm) model = Model(input=x, output=sent_avg) hnn.add(model) return hnn # vim: set expandtab ts=4 sw=4 sts=4 tw=100:
def get_attention(self, inputs): ''' Takes inputs and returns pairs of synsets and corresponding attention values. ''' if not self.attention_model: self.define_attention_model() attention_outputs = self.attention_model.predict(inputs) sent_attention_values = [] for sentence_input, sentence_attention in zip(inputs, attention_outputs): word_attention_values = [] for word_input, word_attention in zip(sentence_input, sentence_attention): # Size of word input is (senses, hyps+1) # Ignoring the last hyp index because that is just the word index pt there by # OntoAwareEmbedding for sense priors. if word_input.sum() == 0: # This is just padding continue word_input = word_input[:, :-1] # removing last hyp index. sense_hyp_prod = self.num_senses * self.num_hyps assert len(word_attention) == sense_hyp_prod or len(word_attention) == 2 * sense_hyp_prod attention_per_sense = [] if len(word_attention) == 2 * sense_hyp_prod: # The encoder is Bidirectional. We have attentions from both directions. forward_sense_attention = word_attention[:len(word_attention) // 2] backward_sense_attention = word_attention[len(word_attention) // 2:] processed_attention = zip(forward_sense_attention, backward_sense_attention) else: # Encoder is not bidirectional processed_attention = word_attention hyp_ind = 0 while hyp_ind < len(processed_attention): attention_per_sense.append(processed_attention[hyp_ind:hyp_ind+self.num_hyps]) hyp_ind += self.num_hyps sense_attention_values = [] for sense_input, attention_per_hyp in zip(word_input, attention_per_sense): hyp_attention_values = [] for hyp_input, hyp_attention in zip(sense_input, attention_per_hyp): if hyp_input == 0: continue hyp_attention_values.append((self.data_processor.get_token_from_index(hyp_input, onto_aware=True), hyp_attention)) sense_attention_values.append(hyp_attention_values) word_attention_values.append(sense_attention_values) sent_attention_values.append(word_attention_values) return sent_attention_values
def fhan3_avg(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32') wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInputs) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) #alpha_its, Si = AttentionLayer(name='att1')(hij) wordDrop = Dropout(DROPOUTPER, name='wordDrop')(hij) word_pool = GlobalAveragePooling1D()(wordDrop) wordEncoder = Model(wordInputs, word_pool) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') #sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(docInputs) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) #alpha_s, Vb = AttentionLayer(name='att2')(hi) sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi) sent_pool = GlobalAveragePooling1D()(sentDrop) Vb = Reshape((1, sent_pool._keras_shape[1]))(sent_pool) #----------------------------------------------------------------------------------------------- headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32') headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=False, name='headlineEmb')(headlineInput) #Vb = Masking(mask_value=0.0, name='Vb')(Vb) headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb') h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb) #a3, Vn = AttentionLayer(name='att3')(h3) headDrop = Dropout(DROPOUTPER, name='3Drop')(h3) head_pool = GlobalAveragePooling1D()(headDrop) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(head_pool) model = Model(inputs=[docInputs, headlineInput] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def fhan3_max(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32') wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInputs) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) #alpha_its, Si = AttentionLayer(name='att1')(hij) wordDrop = Dropout(DROPOUTPER, name='wordDrop')(hij) word_max = GlobalMaxPooling1D()(wordDrop) wordEncoder = Model(wordInputs, word_max) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') #sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(docInputs) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) #alpha_s, Vb = AttentionLayer(name='att2')(hi) sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi) sent_max = GlobalMaxPooling1D()(sentDrop) Vb = Reshape((1, sent_max._keras_shape[1]))(sent_max) #----------------------------------------------------------------------------------------------- headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32') headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=False, name='headlineEmb')(headlineInput) #Vb = Masking(mask_value=0.0, name='Vb')(Vb) headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb') h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb) #a3, Vn = AttentionLayer(name='att3')(h3) headDrop = Dropout(DROPOUTPER, name='3Drop')(h3) head_max = GlobalMaxPooling1D()(headDrop) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(head_max) model = Model(inputs=[docInputs, headlineInput] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def fhan3_pretrain(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name='word1', dtype='float32') wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='emb1')(wordInputs) #Assuming all the sentences have same number of words. Check for input_length again. hij = Bidirectional(GRU(WORDGRU, name='gru1', return_sequences=True))(wordEmbedding) wordDrop = Dropout(DROPOUTPER, name='drop1')(hij) alpha_its, Si = AttentionLayer(name='att1')(wordDrop) wordEncoder = Model(wordInputs, Si) wordEncoder.load_weights('han1_pretrain.h5', by_name=True) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(sentenceMasking) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi) alpha_s, Vb = AttentionLayer(name='att2')(sentDrop) Vb = Reshape((1, Vb._keras_shape[1]))(Vb) #----------------------------------------------------------------------------------------------- headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32') headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=True, name='headlineEmb')(headlineInput) Vb = Masking(mask_value=0.0, name='Vb')(Vb) headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb') h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb) h3Drop = Dropout(DROPOUTPER, name='h3drop')(h3) a3, Vn = AttentionLayer(name='att3')(h3Drop) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(Vn) model = Model(inputs=[docInputs, headlineInput] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def HAN(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32') wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='wordEmbedding')(wordInputs) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) wordDrop = Dropout(DROPOUTPER, name='wordDrop')(hij) alpha_its, Si = AttentionLayer(name='att1')(wordDrop) wordEncoder = Model(wordInputs, Si) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(sentenceMasking) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi) alpha_s, Vb = AttentionLayer(name='att2')(sentDrop) Vb = Reshape((1, Vb._keras_shape[1]))(Vb) #----------------------------------------------------------------------------------------------- headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32') headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=True, name='headlineEmb')(headlineInput) Vb = Masking(mask_value=0.0, name='Vb')(Vb) headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb') h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb) headDrop = Dropout(DROPOUTPER, name='3Drop')(h3) a3, Vn = AttentionLayer(name='att3')(headDrop) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(Vn) model = Model(inputs=[docInputs, headlineInput] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def biLSTM_encoder2(input, units, dropout = 0.0, recurrent_dropout = 0.0, num_layers = 3, input_dropout = 0.3, output_dropout = 0.3, concat_layers = True): """Question and context encoder. Just Bi-LSTM from keras. Added optional dropout between layers. Added optional concatenation of each layer outputs into one output representation.""" outputs = [input] for i in range(num_layers): rnn_input = outputs[-1] if input_dropout > 0: rnn_input = Dropout(rate=input_dropout)(rnn_input) rnn_output = Bidirectional(LSTM(units=units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=True, dropout=dropout, recurrent_dropout = recurrent_dropout, unroll=False)) (rnn_input) outputs.append(rnn_output) # Concat hidden layers if concat_layers: output = concatenate(outputs[1:]) else: output = outputs[-1] if output_dropout > 0: output = Dropout(rate=input_dropout)(output) return output
def __init__(self, lstm_num_layers, lstm_layer_size, trainable_embeddings, **kw): """Initializes the Keras LSTM question processing component. Args: lstm_num_layers: Number of stacked LSTM layers. lstm_layer_size: Dimensionality of each LSTM unit. Keyword Args: max_sentence_length: Maximum number of words to consider in each question, necessary at train time. bidirectional: Whether to use bidirectional LSTM layers. """ print('Loading GloVe data... ', end='', flush=True) self._nlp = English() print('Done.') #embedding_dims = 300 embeddings = get_embeddings(self._nlp.vocab) embedding_dims = embeddings.shape[1] # TODO(Bernhard): Investigate how the LSTM parameters influence the # overall performance. self._max_len = kw.get('max_sentence_length', 15) self._bidirectional = kw.get('bidirectional', False) self._model = Sequential() shallow = lstm_num_layers == 1 # marks a one layer LSTM if trainable_embeddings: # if embeddings are trainable we have to enforce CPU usage in order to not run out of memory. # this is device dependent. # TODO(Bernhard): preprocess questions ans vocab and try if we can get rid of enough words to make # this run on gpu anyway with tf.device("/cpu:0"): self._model.add(Embedding(embeddings.shape[0], embeddings.shape[1], input_length=self._max_len, trainable=True, weights=[embeddings])) else: # a non-trainable embedding layer can run on GPU without exhausting all the memory self._model.add(Embedding(embeddings.shape[0], embeddings.shape[1], input_length=self._max_len, trainable=False, weights=[embeddings])) lstm = LSTM(output_dim=lstm_layer_size, return_sequences=not shallow, input_shape=(self._max_len, embedding_dims)) if self._bidirectional: lstm = Bidirectional(lstm) self._model.add(lstm) if not shallow: for i in range(lstm_num_layers-2): lstm = LSTM(output_dim=lstm_layer_size, return_sequences=True) if self._bidirectional: lstm = Bidirectional(lstm) self._model.add(lstm) lstm = LSTM(output_dim=lstm_layer_size, return_sequences=False) if self._bidirectional: lstm = Bidirectional(lstm) self._model.add(lstm)
def modelConstruction(self): """ Construct the melody and rhythm model. """ # self.melody.add(GRU(128, consume_less = 'mem', return_sequences = True, # input_shape = (self.timestep, 56))) # # self.melody.add(Dropout(0.5)) # self.rhythm.add(GRU(128, consume_less = 'mem', return_sequences = True, # input_shape = (self.timestep, 56))) # self.rhythm.add(Dropout(0.5)) # # for i in range(2): # self.melody.add(GRU(128, return_sequences = True)) # # self.melody.add(Dropout(0.5)) # self.rhythm.add(GRU(128, return_sequences = True)) # self.rhythm.add(Dropout(0.5)) self.melody.add(Dense(128, input_shape = (self.timestep, 56))) self.rhythm.add(Dense(128, input_shape = (self.timestep, 56))) for i in range(4): self.melody.add(Bidirectional(LSTM(128, return_sequences = True))) self.melody.add(Dropout(0.5)) self.rhythm.add(GRU(128, return_sequences = True)) # self.rhythm.add(Dropout(0.5)) self.melody.add(Dense(128, input_shape = (self.timestep, 56))) self.rhythm.add(Dense(128, input_shape = (self.timestep, 56))) # for i in range(3): # self.melody.add(Bidirectional(GRU(128, return_sequences = True))) # self.melody.add(Dropout(0.5)) # self.rhythm.add(GRU(128, return_sequences = True)) self.melody.add(Dense(33, activation = 'softmax')) self.rhythm.add(Dense(23, activation = 'softmax')) #compile part self.melody.compile(optimizer = 'adam', #loss = self.my_loss_function() loss = 'categorical_crossentropy') self.rhythm.compile(optimizer = 'adam', #loss = self.my_loss_function() loss = 'categorical_crossentropy')
def sbrt2017(num_hiddens, var_dropout, dropout, weight_decay, num_features=39, num_classes=28): """ SBRT model Reference: [1] Gal, Y, "A Theoretically Grounded Application of Dropout in Recurrent Neural Networks", 2015. [2] Graves, Alex, Abdel-rahman Mohamed, and Geoffrey Hinton. "Speech recognition with deep recurrent neural networks", 2013. [6] Wu, Yonghui, et al. "Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation.", 2016. """ x = Input(name='inputs', shape=(None, num_features)) o = x if dropout > 0.0: o = Dropout(dropout)(o) o = Bidirectional(LSTM(num_hiddens, return_sequences=True, W_regularizer=l2(weight_decay), U_regularizer=l2(weight_decay), dropout_W=var_dropout, dropout_U=var_dropout, consume_less='gpu'))(o) if dropout > 0.0: o = Dropout(dropout)(o) o = TimeDistributed(Dense(num_classes, W_regularizer=l2(weight_decay)))(o) # Define placeholders labels = Input(name='labels', shape=(None,), dtype='int32', sparse=True) inputs_length = Input(name='inputs_length', shape=(None,), dtype='int32') # Define a decoder dec = Lambda(decode, output_shape=decode_output_shape, arguments={'is_greedy': True}, name='decoder') y_pred = dec([o, inputs_length]) ctc = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc") # Define loss as a layer loss = ctc([o, labels, inputs_length]) return Model(input=[x, labels, inputs_length], output=[loss, y_pred])