我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.layers.TimeDistributed()。
def create_model(self, ret_model = False): image_model = Sequential() image_model.add(Dense(EMBEDDING_DIM, input_dim = 4096, activation='relu')) image_model.add(RepeatVector(self.max_length)) lang_model = Sequential() lang_model.add(Embedding(self.vocab_size, 256, input_length=self.max_length)) lang_model.add(LSTM(256,return_sequences=True)) lang_model.add(TimeDistributed(Dense(EMBEDDING_DIM))) model = Sequential() model.add(Merge([image_model, lang_model], mode='concat')) model.add(LSTM(1000,return_sequences=False)) model.add(Dense(self.vocab_size)) model.add(Activation('softmax')) print ("Model created!") if(ret_model==True): return model model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def __call__(self, inputs): x = self._merge_inputs(inputs) shape = getattr(x, '_keras_shape') replicate_model = self._replicate_model(kl.Input(shape=shape[2:])) x = kl.TimeDistributed(replicate_model)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Bidirectional(kl.GRU(128, kernel_regularizer=kernel_regularizer, return_sequences=True), merge_mode='concat')(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) gru = kl.GRU(256, kernel_regularizer=kernel_regularizer) x = kl.Bidirectional(gru)(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def simple_cnn(agent, env, dropout=0, learning_rate=1e-3, **args): with tf.device("/cpu:0"): state = tf.placeholder('float', [None, agent.input_dim]) S = Input(shape=[agent.input_dim]) h = Reshape( agent.input_dim_orig )(S) h = TimeDistributed( Convolution2D(16, 8, 8, subsample=(4, 4), border_mode='same', activation='relu', dim_ordering='tf'))(h) # h = Dropout(dropout)(h) h = TimeDistributed( Convolution2D(32, 4, 4, subsample=(2, 2), border_mode='same', activation='relu', dim_ordering='tf'))(h) h = Flatten()(h) # h = Dropout(dropout)(h) h = Dense(256, activation='relu')(h) # h = Dropout(dropout)(h) h = Dense(128, activation='relu')(h) V = Dense(env.action_space.n, activation='linear',init='zero')(h) model = Model(S, V) model.compile(loss='mse', optimizer=RMSprop(lr=learning_rate) ) return state, model
def answer_start_pred(context_encoding, question_attention_vector, context_mask, W, dropout_rate): """Answer start prediction layer.""" answer_start = Lambda(lambda arg: concatenate([arg[0], arg[1], arg[2]]))([ context_encoding, question_attention_vector, multiply([context_encoding, question_attention_vector])]) answer_start = TimeDistributed(Dense(W, activation='relu'))(answer_start) answer_start = Dropout(rate=dropout_rate)(answer_start) answer_start = TimeDistributed(Dense(1))(answer_start) # apply masking answer_start = Lambda(lambda q: masked_softmax(q[0], q[1]))([answer_start, context_mask]) answer_start = Lambda(lambda q: flatten(q))(answer_start) return answer_start
def classifier(base_layers, input_rois, batch_size, nb_classes = 3, trainable=False): # compile times tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (batch_size,14,14,2048) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (batch_size,2048,7,7) out_roi_pool = RoiPoolingConv(pooling_regions, batch_size)([base_layers, input_rois]) out = TimeDistributed(Flatten())(out_roi_pool) # out = TimeDistributed(Dropout(0.4))(out) # out = TimeDistributed(Dense(2048,activation='relu'))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * nb_classes, activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def classifier(base_layers, input_rois, batch_size, nb_classes = 3, trainable=False): # compile times tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (batch_size,14,14,512) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (batch_size,512,7,7) out_roi_pool = RoiPoolingConv(pooling_regions, batch_size)([base_layers, input_rois]) out = TimeDistributed(Flatten())(out_roi_pool) out = TimeDistributed(Dense(4096,activation='relu'))(out) out = TimeDistributed(Dropout(0.5))(out) out = TimeDistributed(Dense(4096,activation='relu'))(out) out = TimeDistributed(Dropout(0.5))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * nb_classes, activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def classifier(base_layers, input_rois, batch_size, nb_classes = 3, trainable=False): # compile times tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (batch_size,14,14,1024) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (batch_size,1024,7,7) out_roi_pool = RoiPoolingConv(pooling_regions, batch_size)([base_layers, input_rois]) out = TimeDistributed(Flatten())(out_roi_pool) out = TimeDistributed(Dense(4096,activation='relu'))(out) out = TimeDistributed(Dropout(0.5))(out) out = TimeDistributed(Dense(4096,activation='relu'))(out) out = TimeDistributed(Dropout(0.5))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * nb_classes, activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 7 input_shape = (num_rois,7,7,512) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois,512,7,7) out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool) out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) out = TimeDistributed(Dropout(0.5))(out) out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) out = TimeDistributed(Dropout(0.5))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def build(self) -> None: """ ?????????? ??????. """ inp = Input(shape=(None,)) emb = Embedding(len(self.grapheme_alphabet), self.emb_dimension)(inp) encoded = Bidirectional(self.rnn(self.units1, return_sequences=True, recurrent_dropout=self.dropout))(emb) encoded = Dropout(self.dropout)(encoded) decoded = TimeDistributed(Dense(self.units2, activation="relu"))(encoded) predictions = TimeDistributed(Dense(len(self.phonetic_alphabet), activation="softmax"))(decoded) model = Model(inputs=inp, outputs=predictions) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) self.model = model
def build(self) -> None: """ ?????????? ??????. """ inp = Input(shape=(None,)) emb = Embedding(len(self.grapheme_set), self.emb_dimension)(inp) encoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(emb) encoded = Dropout(self.dropout)(encoded) decoded = TimeDistributed(Dense(self.units, activation="relu"))(encoded) predictions = TimeDistributed(Dense(3, activation="softmax"))(decoded) model = Model(inputs=inp, outputs=predictions) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) self.model = model
def build(self) -> None: """ ?????????? ??????. """ inp = Input(shape=(None,)) emb = Embedding(len(self.phonetic_alphabet), self.emb_dimension)(inp) encoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(emb) encoded = Dropout(self.dropout)(encoded) decoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(encoded) decoded = Dropout(self.dropout)(decoded) predictions = TimeDistributed(Dense(3, activation="softmax"))(decoded) model = Model(inputs=inp, outputs=predictions) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) self.model = model
def prep_model(inputs, N, s0pad, s1pad, c): # Word-level projection before averaging inputs[0] = TimeDistributed(Dense(N, activation='relu'))(inputs[0]) inputs[0] = Lambda(lambda x: K.max(x, axis=1), output_shape=(N, ))(inputs[0]) inputs[1] = TimeDistributed(Dense(N, activation='relu'))(inputs[1]) inputs[1] = Lambda(lambda x: K.max(x, axis=1), output_shape=(N, ))(inputs[1]) merged = concatenate([inputs[0], inputs[1]]) # Deep for i in range(c['deep']): merged = Dense(c['nndim'], activation=c['nnact'])(merged) merged = Dropout(c['nndropout'])(merged) merged = BatchNormalization()(merged) is_duplicate = Dense(1, activation='sigmoid')(merged) return [is_duplicate], N
def build(self, inputs_shape): # Import dimensions (max_atoms, max_degree, num_atom_features, num_bond_features, num_samples) = mol_shapes_to_dims(mol_shapes=inputs_shape) # Add the dense layer that contains the trainable parameters # Initialise dense layer with specified params (kwargs) and name inner_layer = self.create_inner_layer_fn() inner_layer_type = inner_layer.__class__.__name__.lower() inner_layer.name = self.name + '_inner_'+ inner_layer_type # Initialise TimeDistributed layer wrapper in order to parallelise # dense layer across atoms inner_3D_layer_name = self.name + '_inner_timedistributed' self.inner_3D_layer = layers.TimeDistributed(inner_layer, name=inner_3D_layer_name) # Build the TimeDistributed layer (which will build the Dense layer) self.inner_3D_layer.build((None, max_atoms, num_atom_features+num_bond_features)) # Store dense_3D_layer and it's weights self.trainable_weights = self.inner_3D_layer.trainable_weights
def ResidualBlock1D_helper(layers, kernel_size, filters, final_stride=1): def f(_input): basic = _input for ln in range(layers): #basic = BatchNormalization()( basic ) # triggers known keras bug w/ TimeDistributed: https://github.com/fchollet/keras/issues/5221 basic = ELU()(basic) basic = Conv1D(filters, kernel_size, kernel_initializer='he_normal', kernel_regularizer=l2(1.e-4), padding='same')(basic) # note that this strides without averaging return AveragePooling1D(pool_size=1, strides=final_stride)(Add()([_input, basic])) return f
def classifier_layers(x, input_shape, stage_num, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround # (hence a smaller stride in the region that follows the ROI pool) if K.backend() == 'tensorflow': x = conv_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='a', input_shape=input_shape, strides=(1, 2), trainable=trainable) elif K.backend() == 'theano': x = conv_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='a', input_shape=input_shape, strides=(1, 1), trainable=trainable) print 'INFO: Classifier layers x block a: ', x x = identity_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='c', trainable=trainable) print 'INFO: Classifier layers x block b: ', x x = identity_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='d', trainable=trainable) print 'INFO: Classifier layers x block c: ', x #x = TimeDistributed(AveragePooling2D((2, 1)), name='avg_pool')(x) return x
def BidirLSTM(n_nodes, n_classes, n_feat, max_len=None, causal=True, loss='categorical_crossentropy', optimizer="adam", return_param_str=False): inputs = Input(shape=(None,n_feat)) model = LSTM(n_nodes, return_sequences=True)(inputs) # Birdirectional LSTM if not causal: model_backwards = LSTM(n_nodes, return_sequences=True, go_backwards=True)(inputs) model = Merge(mode="concat")([model, model_backwards]) model = TimeDistributed(Dense(n_classes, activation="softmax"))(model) model = Model(input=inputs, output=model) model.compile(optimizer=optimizer, loss=loss, sample_weight_mode="temporal", metrics=['accuracy']) if return_param_str: param_str = "LSTM_N{}".format(n_nodes) if causal: param_str += "_causal" return model, param_str else: return model
def global_handle(self, emb_layer, flag): fw_lstm_out = self.forward_lstm(emb_layer) bw_lstm_out = self.backward_lstm(emb_layer) conv_out = self.conv_dropout(self.conv(emb_layer)) fw_lstm_out = TimeDistributed(Dense(self.params['attention_dim']), name='fw_tb_'+flag)(fw_lstm_out) fw_lstm_att = Attention()(fw_lstm_out) # fw_lstm_att = Reshape((self.params['lstm_output_dim'], 1))(fw_lstm_att) conv_out = TimeDistributed(Dense(self.params['attention_dim']), name='conv_tb_'+flag)(conv_out) conv_att = Attention()(conv_out) # conv_att = Reshape((self.params['filters'], 1))(conv_att) bw_lstm_out = TimeDistributed(Dense(self.params['attention_dim']), name='bw_tb_'+flag)(bw_lstm_out) bw_lstm_att = Attention()(bw_lstm_out) # bw_lstm_att = Reshape((self.params['lstm_output_dim'], 1))(bw_lstm_att) return concatenate([fw_lstm_att, conv_att, bw_lstm_att], axis=2)
def SimpleRecurrentModel(params): model = Sequential() # Incorporating leakiness in the neurons model.add(leak_recurrent(input_dim=2, output_dim=params['N_rec'], return_sequences=True, activation='relu', noise=params['rec_noise'], consume_less='mem', tau=params['tau'], dale_ratio=params['dale_ratio'])) # Before going directly to the output, we apply a relu to the signal FIRST and THEN sum THOSE signals # So this is the difference between W * [x]_+ (what we want) and [W * x]_+ (what we would have gotten) model.add(Activation('relu')) # Output neuron model.add(TimeDistributed(dense_output_with_mask(output_dim=1, activation='linear', dale_ratio=params['dale_ratio'], input_dim=params['N_rec']))) # Using mse, like in Daniel's example. Training is slow, for some reason when using binary_crossentropy model.compile(loss = 'mse', optimizer='Adam', sample_weight_mode="temporal") return model
def arch_attention(embedding_layer, sequence_length, classes): tweet_input = Input(shape=(sequence_length,), dtype='int32') embedded_tweet = embedding_layer(tweet_input) activations = LSTM(128, return_sequences=True, name='recurrent_layer')(embedded_tweet) attention = TimeDistributed(Dense(1, activation='tanh'))(activations) attention = Flatten()(attention) attention = Activation('softmax')(attention) attention = RepeatVector(128)(attention) attention = Permute([2, 1], name='attention_layer')(attention) sent_representation = merge([activations, attention], mode='mul') sent_representation = Lambda(lambda xin: K.sum(xin, axis=1), name='merged_layer')(sent_representation) tweet_output = Dense(classes, activation='softmax', name='predictions')(sent_representation) tweetnet = Model(tweet_input, tweet_output) tweetnet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return tweetnet
def arch_attention36(embedding_layer, sequence_length, classes): tweet_input = Input(shape=(sequence_length,), dtype='int32') embedded_tweet = embedding_layer(tweet_input) activations = LSTM(36, return_sequences=True, name='recurrent_layer')(embedded_tweet) attention = TimeDistributed(Dense(1, activation='tanh'))(activations) attention = Flatten()(attention) attention = Activation('softmax')(attention) attention = RepeatVector(36)(attention) attention = Permute([2, 1], name='attention_layer')(attention) sent_representation = merge([activations, attention], mode='mul') sent_representation = Lambda(lambda xin: K.sum(xin, axis=1), name='merged_layer')(sent_representation) tweet_output = Dense(classes, activation='softmax', name='output_layer')(sent_representation) tweetnet = Model(tweet_input, tweet_output) tweetnet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return tweetnet
def BiGRU(X_train, y_train, X_test, y_test, gru_units, dense_units, input_shape, \ batch_size, epochs, drop_out, patience): model = Sequential() reg = L1L2(l1=0.2, l2=0.2) model.add(Bidirectional(GRU(units = gru_units, dropout= drop_out, activation='relu', recurrent_regularizer = reg, return_sequences = True), input_shape = input_shape, merge_mode="concat")) model.add(BatchNormalization()) model.add(TimeDistributed(Dense(dense_units, activation='relu'))) model.add(BatchNormalization()) model.add(Bidirectional(GRU(units = gru_units, dropout= drop_out, activation='relu', recurrent_regularizer=reg, return_sequences = True), merge_mode="concat")) model.add(BatchNormalization()) model.add(Dense(units=1)) model.add(GlobalAveragePooling1D()) print(model.summary()) early_stopping = EarlyStopping(monitor="val_loss", patience = patience) model.compile(loss='mse', optimizer= 'adam') history_callback = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,\ verbose=2, callbacks=[early_stopping], validation_data=[X_test, y_test], shuffle = True) return model, history_callback
def generate_model(output_len, chars=None): """Generate the model""" print('Build model...') chars = chars or CHARS model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). for layer_number in range(INPUT_LAYERS): model.add(recurrent.LSTM(HIDDEN_SIZE, input_shape=(None, len(chars)), init=INITIALIZATION, return_sequences=layer_number + 1 < INPUT_LAYERS)) model.add(Dropout(AMOUNT_OF_DROPOUT)) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(output_len)) # The decoder RNN could be multiple layers stacked or a single layer for _ in range(OUTPUT_LAYERS): model.add(recurrent.LSTM(HIDDEN_SIZE, return_sequences=True, init=INITIALIZATION)) model.add(Dropout(AMOUNT_OF_DROPOUT)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(len(chars), init=INITIALIZATION))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def generate_model(args, nb_features, input_length, nb_repeats=1): """ Generate the model. """ emb_weights = np.eye(nb_features) model = Sequential() model.add(Embedding(input_dim=nb_features, output_dim=nb_features, input_length=input_length, weights=[emb_weights], trainable=False)) for layer_id in range(args.input_layers): model.add(args.cell_type(args.hidden_layers, return_sequences=layer_id + 1 < args.input_layers)) model.add(Dropout(args.dropout)) model.add(RepeatVector(nb_repeats)) for _ in range(args.output_layers): model.add(args.cell_type(args.hidden_layers, return_sequences=True)) model.add(Dropout(args.dropout)) model.add(TimeDistributed(Dense(nb_features))) model.add(Activation("softmax")) model.compile(loss="sparse_categorical_crossentropy", optimizer=args.optimizer, metrics=["accuracy"]) return model
def test(path_test, input_size, hidden_size, batch_size, save_dir, model_name, maxlen): db = read_data(path_test) X = create_sequences(db, maxlen, maxlen) y = create_sequences(db, maxlen, maxlen) X = np.reshape(X, (X.shape[0], X.shape[1], 1)) y = np.reshape(y, (y.shape[0], y.shape[1], 1)) # build the model: 1 layer LSTM print('Build model...') model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(LSTM(hidden_size, input_shape=(maxlen, input_size))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(maxlen)) # The decoder RNN could be multiple layers stacked or a single layer model.add(LSTM(hidden_size, return_sequences=True)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(1))) model.load_weights(save_dir + model_name) model.compile(loss='mae', optimizer='adam') model.summary() prediction = model.predict(X, batch_size, verbose=1, ) prediction = prediction.flatten() # prediction_container = np.array(prediction).flatten() plt.plot(prediction.flatten()[:4000], label='prediction') plt.plot(y.flatten()[maxlen:4000 + maxlen], label='true') plt.legend() plt.show() store_prediction_and_ground_truth(model)
def train_normal_model(path_train, input_size, hidden_size, batch_size, early_stopping_patience, val_percentage, save_dir, model_name, maxlen): if not os.path.exists(save_dir): os.mkdir(save_dir) db = read_data(path_train) train_x = db[:-maxlen] train_y = db[maxlen:] X = create_sequences(train_x, maxlen, maxlen) y = create_sequences(train_y, maxlen, maxlen) X = np.reshape(X, (X.shape[0], X.shape[1], 1)) y = np.reshape(y, (y.shape[0], y.shape[1], 1)) # # preparing the callbacks check_pointer = callbacks.ModelCheckpoint(filepath=save_dir + model_name, verbose=1, save_best_only=True) early_stop = callbacks.EarlyStopping(patience=early_stopping_patience, verbose=1) # build the model: 1 layer LSTM print('Build model...') model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(LSTM(hidden_size, input_shape=(maxlen, input_size))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(maxlen)) # The decoder RNN could be multiple layers stacked or a single layer model.add(LSTM(hidden_size, return_sequences=True)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(1))) model.compile(loss='mae', optimizer='adam') model.summary() model.fit(X, y, batch_size=batch_size, nb_epoch=50, validation_split=val_percentage, callbacks=[check_pointer, early_stop]) return model
def ann_rnn(input_shape, n_classes): """ for working with extracted features """ model = Sequential(name='ann_rnn') model.add(TimeDistributed(Dense (80, activation='elu', kernel_initializer='he_normal'), input_shape=input_shape)) model.add(BatchNormalization()) model.add(Dropout(0.35)) model.add(TimeDistributed(Dense (80, activation='elu', kernel_initializer='he_normal'))) model.add(BatchNormalization()) model.add(Dropout(0.35)) model.add(LSTM(50)) model.add(Dense(n_classes, activation = 'softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=[keras.metrics.categorical_accuracy]) return model
def __init__(self, rnn_dim, rnn_unit='gru', input_shape=(0,), dropout=0.0, highway=False, return_sequences=False, dense_dim=0): if rnn_unit == 'gru': rnn = GRU else: rnn = LSTM self.model = Sequential() self.model.add( Bidirectional(rnn(rnn_dim, dropout=dropout, recurrent_dropout=dropout, return_sequences=return_sequences), input_shape=input_shape)) # self.model.add(rnn(rnn_dim, # dropout=dropout, # recurrent_dropout=dropout, # return_sequences=return_sequences, # input_shape=input_shape)) if highway: if return_sequences: self.model.add(TimeDistributed(Highway(activation='tanh'))) else: self.model.add(Highway(activation='tanh')) if dense_dim > 0: self.model.add(TimeDistributed(Dense(dense_dim, activation='relu'))) self.model.add(TimeDistributed(Dropout(dropout))) self.model.add(TimeDistributed(BatchNormalization()))
def fhan2_max(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32') wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInputs) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) Si = GlobalMaxPooling1D()(hij) wordEncoder = Model(wordInputs, Si) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') #sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(docInputs) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) Vb = GlobalMaxPooling1D()(hi) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'glorot_uniform', name="dense")(Vb) model = Model(inputs=[docInputs] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def han2(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER): wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32') #print 'in han2 max-nb-words' #print MAX_NB_WORDS wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='wordEmbedding')(wordInputs) hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding) alpha_its, Si = AttentionLayer(name='att1')(hij) #wordDrop = Dropout(DROPOUTPER, name='wordDrop')(Si) wordEncoder = Model(wordInputs, Si) # ----------------------------------------------------------------------------------------------- docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32') sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs) sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(sentenceMasking) hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding) alpha_s, Vb = AttentionLayer(name='att2')(hi) #sentDrop = Dropout(DROPOUTPER, name='sentDrop')(Vb) v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(Vb) model = Model(inputs=[docInputs] , outputs=[v6]) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, wordEncoder
def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1): x = Input(batch_shape=(batch_size, None)) # mebedding y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x) for i in range(nb_layers-1): y = GCNN(hidden_dim, window_size=window_size, name='gcnn{}'.format(i + 1))(y) y = GCNN(hidden_dim, window_size=window_size, name='gcnn{}'.format(nb_layers))(y) y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y) model = Model(inputs=x, outputs=y) return model
def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1): x = Input(batch_shape=(batch_size, None)) # mebedding y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x) for i in range(nb_layers-1): y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(i + 1))(y) y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(nb_layers))(y) y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y) model = Model(inputs=x, outputs=y) return model
def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1): x = Input(batch_shape=(batch_size, None)) # mebedding y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x) for i in range(nb_layers-1): y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(i + 1))(y) y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(nb_layers))(y) y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y) model = Model(input=x, output=y) return model
def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1): x = Input(batch_shape=(batch_size, None)) # mebedding y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x) for i in range(nb_layers-1): y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(i + 1))(y) y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(nb_layers))(y) y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y) model = Model(input=x, output=y) return model
def learnable_wiq(context, question, question_mask, layer_dim): """Aligned question embedding. Same as in DRQA paper.""" question_enc = TimeDistributed(Dense(units=layer_dim, activation='relu'))(question) context_enc = TimeDistributed(Dense(units=layer_dim, activation='relu'))(context) question_enc = Lambda(lambda q: tf.transpose(q, [0, 2, 1]))(question_enc) matrix = Lambda(lambda q: tf.matmul(q[0], q[1]))([context_enc, question_enc]) coefs = Lambda(lambda q: masked_softmax(matrix, question_mask, axis=2, expand=1))([matrix, question_mask]) aligned_question_enc = Lambda(lambda q: tf.matmul(q[0], q[1]))([coefs, question]) return(aligned_question_enc)
def projection(encoding, W, dropout_rate): """Projection layer. Dense layer from keras. In FastQA is applied after the encoder, to project context and question representations into different spaces.""" proj = TimeDistributed( Dense(W, trainable=True, weights=np.concatenate((np.eye(W), np.eye(W)), axis=1)))(encoding) proj = Dropout(rate=dropout_rate)(proj) return proj
def question_attn_vector(question_encoding, question_mask, context_encoding, repeat=True): """Attention over question.""" question_attention_vector = TimeDistributed(Dense(1))(question_encoding) # apply masking question_attention_vector = Lambda(lambda q: masked_softmax(q[0], q[1]))([question_attention_vector, question_mask]) # apply the attention question_attention_vector = Lambda(lambda q: q[0] * q[1])([question_encoding, question_attention_vector]) question_attention_vector = Lambda(lambda q: K.sum(q, axis=1))(question_attention_vector) if repeat==True: question_attention_vector = Lambda(lambda q: repeat_vector(q[0], q[1]))([question_attention_vector, context_encoding]) return question_attention_vector
def answer_end_pred(context_encoding, question_attention_vector, context_mask, answer_start_distribution, W, dropout_rate): """Answer end prediction layer.""" # Answer end prediction depends on the start prediction def s_answer_feature(x): maxind = K.argmax( x, axis=1, ) return maxind x = Lambda(lambda x: K.tf.cast(s_answer_feature(x), dtype=K.tf.int32))(answer_start_distribution) start_feature = Lambda(lambda arg: K.tf.gather_nd(arg[0], K.tf.stack( [tf.range(K.tf.shape(arg[1])[0]), tf.cast(arg[1], K.tf.int32)], axis=1)))([context_encoding, x]) start_feature = Lambda(lambda q: repeat_vector(q[0], q[1]))([start_feature, context_encoding]) # Answer end prediction answer_end = Lambda(lambda arg: concatenate([ arg[0], arg[1], arg[2], multiply([arg[0], arg[1]]), multiply([arg[0], arg[2]]) ]))([context_encoding, question_attention_vector, start_feature]) answer_end = TimeDistributed(Dense(W, activation='relu'))(answer_end) answer_end = Dropout(rate=dropout_rate)(answer_end) answer_end = TimeDistributed(Dense(1))(answer_end) # apply masking answer_end = Lambda(lambda q: masked_softmax(q[0], q[1]))([answer_end, context_mask]) answer_end = Lambda(lambda q: flatten(q))(answer_end) return answer_end
def conv_unit(inp, n_gram, no_word = 200, window = 2): out = Conv1D(no_word, window, strides=1, padding="valid", activation='relu')(inp) out = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(out) out = ZeroPadding1D(padding=(0, window-1))(out) return out
def __init__(self, num_classes, token_index, max_sents, max_tokens, embedding_type='glove.6B.100d', embedding_dims=100): """Creates a `SentenceModelFactory` instance for building various models that operate over (samples, max_sentences, max_tokens) input. Args: num_classes: The number of output classes. token_index: The dictionary of token and its corresponding integer index value. max_sents: The max number of sentences in a document. max_tokens: The max number of tokens in a sentence. embedding_type: The embedding type to use. Set to None to use random embeddings. (Default value: 'glove.6B.100d') embedding_dims: The number of embedding dims to use for representing a word. This argument will be ignored when `embedding_type` is set. (Default value: 100) """ self.num_classes = num_classes self.token_index = token_index self.max_sents = max_sents self.max_tokens = max_tokens # This is required to make TimeDistributed(word_encoder_model) work. # TODO: Get rid of this restriction when https://github.com/fchollet/keras/issues/6917 resolves. if self.max_tokens is None: raise ValueError('`max_tokens` should be provided.') if embedding_type is not None: self.embeddings_index = get_embeddings_index(embedding_type) self.embedding_dims = self.embeddings_index.values()[0].shape[-1] else: self.embeddings_index = None self.embedding_dims = embedding_dims
def test_sequential_model_saving(): model = Sequential() model.add(Dense(2, input_dim=3)) model.add(RepeatVector(3)) model.add(TimeDistributed(Dense(3))) model.compile(loss=objectives.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) new_model = load_model(fname) os.remove(fname) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05)
def build_hcnn_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False): N = maxnum L = maxlen logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, filter2_len = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim, opts.nbfilters, opts.filter1_len, opts.filter2_len, opts.dropout, opts.l2_value)) word_input = Input(shape=(N*L,), dtype='int32', name='word_input') x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input) drop_x = Dropout(opts.dropout, name='drop_x')(x) resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x) z = TimeDistributed(Convolution1D(opts.nbfilters, opts.filter1_len, border_mode='valid'), name='z')(resh_W) avg_z = TimeDistributed(AveragePooling1D(pool_length=L-opts.filter1_len+1), name='avg_z')(z) # shape= (N, 1, nbfilters) resh_z = Reshape((N, opts.nbfilters), name='resh_z')(avg_z) # shape(N, nbfilters) hz = Convolution1D(opts.nbfilters, opts.filter2_len, border_mode='valid', name='hz')(resh_z) # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz) avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz) y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz) model = Model(input=word_input, output=y) if verbose: model.summary() start_time = time.time() model.compile(loss='mse', optimizer='rmsprop') total_time = time.time() - start_time logger.info("Model compiled in %.4f s" % total_time) return model
def build_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None): N = maxnum L = maxlen logger = get_logger("Build model") logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim, opts.lstm_units, opts.dropout, opts.l2_value)) word_input = Input(shape=(N*L,), dtype='int32', name='word_input') x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input) drop_x = Dropout(opts.dropout, name='drop_x')(x) resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x) z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W) avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z) hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(avg_z) # TODO, random drop sentences drop_hz = Dropout(opts.dropout, name='drop_hz')(hz) avg_hz = GlobalAveragePooling1D(name='avg_hz')(drop_hz) y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz) model = Model(input=word_input, output=y) if opts.init_bias and init_mean_value: logger.info("Initialise output layer bias with log(y_mean/1-y_mean)") bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) if verbose: model.summary() start_time = time.time() model.compile(loss='mse', optimizer='rmsprop') total_time = time.time() - start_time logger.info("Model compiled in %.4f s" % total_time) return model
def build_bidirectional_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None): N = maxnum L = maxlen logger = get_logger("Build bidirectional model") logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim, opts.lstm_units, opts.dropout, opts.l2_value)) word_input = Input(shape=(N*L,), dtype='int32', name='word_input') x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input) drop_x = Dropout(opts.dropout, name='drop_x')(x) resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x) z_fwd = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z_fwd')(resh_W) z_bwd = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True, go_backwards=True), name='z_bwd')(resh_W) z_merged = merge([z_fwd, z_bwd], mode='concat', name='z_merged') avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z_merged) hz_fwd = LSTM(opts.lstm_units, return_sequences=True, name='hz_fwd')(avg_z) hz_bwd = LSTM(opts.lstm_units, return_sequences=True, go_backwards=True, name='hz_bwd')(avg_z) hz_merged = merge([hz_fwd, hz_bwd], mode='concat', name='hz_merged') # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz) avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz_merged) y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz) model = Model(input=word_input, output=y) if opts.init_bias and init_mean_value: logger.info("Initialise output layer bias with log(y_mean/1-y_mean)") bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) if verbose: model.summary() start_time = time.time() model.compile(loss='mse', optimizer='rmsprop') total_time = time.time() - start_time logger.info("Model compiled in %.4f s" % total_time) return model
def build_attention_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None): N = maxnum L = maxlen logger = get_logger('Build attention pooling model') logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim, opts.lstm_units, opts.dropout, opts.l2_value)) word_input = Input(shape=(N*L,), dtype='int32', name='word_input') x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input) drop_x = Dropout(opts.dropout, name='drop_x')(x) resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x) z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W) avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z) hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(avg_z) # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz) # avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz) attent_hz = Attention(name='attent_hz')(hz) y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz) model = Model(input=word_input, output=y) if opts.init_bias and init_mean_value: logger.info("Initialise output layer bias with log(y_mean/1-y_mean)") bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) if verbose: model.summary() start_time = time.time() model.compile(loss='mse', optimizer='rmsprop') total_time = time.time() - start_time logger.info("Model compiled in %.4f s" % total_time) return model
def build_attention2_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None): N = maxnum L = maxlen logger = get_logger('Build attention pooling model') logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim, opts.lstm_units, opts.dropout, opts.l2_value)) word_input = Input(shape=(N*L,), dtype='int32', name='word_input') x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input) drop_x = Dropout(opts.dropout, name='drop_x')(x) resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x) z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W) att_z = TimeDistributed(Attention(name='att_z'))(z) hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(att_z) # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz) # avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz) attent_hz = Attention(name='attent_hz')(hz) y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz) model = Model(input=word_input, output=y) if opts.init_bias and init_mean_value: logger.info("Initialise output layer bias with log(y_mean/1-y_mean)") bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) if verbose: model.summary() start_time = time.time() model.compile(loss='mse', optimizer='rmsprop') total_time = time.time() - start_time logger.info("Model compiled in %.4f s" % total_time) return model
def __init__(self, dense_dim, sequence_length=0, input_dim=0, dropout=0.0): self.dense_dim = dense_dim self.sequence_length = sequence_length self.input_dim = input_dim model = Sequential() model.add(Dense(dense_dim, activation='relu', input_shape=(input_dim,))) model.add(Dropout(dropout)) model.add(BatchNormalization()) self.model = TimeDistributed(model)
def __init__(self, dense_dim, sequence_length=0, input_dim=0, dropout=0.0): model = Sequential() model.add(Dense(dense_dim, activation='relu', input_shape=(input_dim,))) model.add(Dropout(dropout)) model.add(BatchNormalization()) self.model = TimeDistributed(model, input_shape=(sequence_length, input_dim,))