我们从Python开源项目中,提取了以下46个代码示例,用于说明如何使用keras.layers.recurrent.GRU。
def build_model(layers): model = Sequential() model.add(GRU(input_dim=layers[0], output_dim=layers[1], activation='tanh', return_sequences=True)) model.add(Dropout(0.15)) # Dropout overfitting # model.add(GRU(layers[2],activation='tanh', return_sequences=True)) # model.add(Dropout(0.2)) # Dropout overfitting model.add(GRU(layers[2], activation='tanh', return_sequences=False)) model.add(Dropout(0.15)) # Dropout overfitting model.add(Dense(output_dim=layers[3])) model.add(Activation("linear")) start = time.time() # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) # model.compile(loss="mse", optimizer=sgd) model.compile(loss="mse", optimizer="rmsprop") # Nadam rmsprop print "Compilation Time : ", time.time() - start return model
def understand_variable_length_handle(): """????????? recurrent layer ??????""" model = Sequential() model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) model.compile(loss='mean_squared_error', optimizer='sgd') train_x = np.random.randn(100, 78, 256) train_y = np.random.randn(100, 78, 256) model.fit(train_x, train_y, verbose=0) inz_1 = np.random.randn(1, 78, 256) rez_1 = model.predict_proba(inz_1, verbose=0) inz_2 = np.random.randn(1, 87, 256) rez_2 = model.predict_proba(inz_2, verbose=0) print() print('=========== understand variable length =================') print('With `return_sequence=True`') print('Input shape is: {}, output shae is {}'.format(inz_1.shape, rez_1.shape)) print('Input shape is: {}, output shae is {}'.format(inz_2.shape, rez_2.shape)) print('====================== end =============================')
def try_variable_length_train(): """???????? ?????????? train_x ? train_y ? dtype ? object ??? ?? shape ???? (100,) ????????? """ model = Sequential() model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) model.compile(loss='mean_squared_error', optimizer='sgd') train_x = [] train_y = [] for i in range(100): seq_length = np.random.randint(78, 87 + 1) sequence = [] for _ in range(seq_length): sequence.append([np.random.randn() for _ in range(256)]) train_x.append(np.array(sequence)) train_y.append(np.array(sequence)) train_x = np.array(train_x) train_y = np.array(train_y) model.fit(np.array(train_x), np.array(train_y))
def test_temporal_clf(self): print('temporal classification data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10), classification=True, nb_class=2) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential() model.add(GRU(X_train.shape[-1], y_train.shape[-1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2) self.assertTrue(history.validation_accuracy[-1] > 0.9)
def test_regularizer(layer_class): layer = layer_class(output_dim, return_sequences=False, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), W_regularizer=regularizers.WeightRegularizer(l1=0.01), U_regularizer=regularizers.WeightRegularizer(l1=0.01), b_regularizer='l2') shape = (nb_samples, timesteps, embedding_dim) layer.build(shape) output = layer(K.variable(np.ones(shape))) K.eval(output) if layer_class == recurrent.SimpleRNN: assert len(layer.losses) == 3 if layer_class == recurrent.GRU: assert len(layer.losses) == 9 if layer_class == recurrent.LSTM: assert len(layer.losses) == 12
def fit(self, X, y): assert isinstance(X, list) #TODO: this should not be an assert assert len(y) > 0 assert len(X) == len(y) X = pad_sequences(X) print X.shape, y.shape n_features = X.shape[2] self.n_labels_ = y.shape[1] print n_features, self.n_labels_ model = Sequential() model.add(GRU(n_features, 128)) model.add(Dropout(0.1)) model.add(BatchNormalization(128)) model.add(Dense(128, self.n_labels_)) model.add(Activation('sigmoid')) sgd = opt.SGD(lr=0.005, decay=1e-6, momentum=0., nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, class_mode='categorical') model.fit(X, y, batch_size=self.n_batch_size, nb_epoch=self.n_epochs, show_accuracy=True) self.model_ = model
def build_lstm(input_shape): model = Sequential() # model.add(Masking(input_shape=input_shape, mask_value=-1.)) model.add(Embedding(input_shape[0], 128, input_length=input_shape[1])) model.add(Convolution1D(nb_filter=64, filter_length=5, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=4)) model.add(GRU(128)) # model.add(GRU(128, return_sequences=False)) # Add dropout if overfitting # model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def build_lstm(input_shape): model = Sequential() # model.add(Masking(input_shape=input_shape, mask_value=-1.)) model.add(Embedding(input_shape[0], 128, input_length=input_shape[1])) model.add(Convolution1D(nb_filter=64, filter_length=5, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=model.output_shape[1])) model.add(Flatten()) model.add(Dense(128)) # model.add(GRU(128, return_sequences=False)) # Add dropout if overfitting # model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def test_temporal_clf(self): print('temporal classification data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5), classification=True, nb_class=2) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential() model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1]))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta') history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2) self.assertTrue(history.history['val_acc'][-1] > 0.9)
def create_model(self): model = Sequential() model.add(Embedding(output_dim=self.n_embedding_nodes, input_dim=self.lexicon_size + 1, input_length=self.n_timesteps, mask_zero=True, name='embedding_layer')) for layer_num in range(self.n_hidden_layers): if layer_num == self.n_hidden_layers - 1: return_sequences = False else: #add extra hidden layers return_sequences = True model.add(GRU(output_dim=self.n_hidden_nodes, return_sequences=return_sequences, name='hidden_layer' + str(layer_num + 1))) model.add(Dense(output_dim=self.n_output_classes, activation='softmax', name='output_layer')) # if emb_weights is not None: # #initialize weights with lm weights # model.layers[0].set_weights(emb_weights) #set embeddings # if layer1_weights is not None: # model.layers[1].set_weights(layer1_weights) #set recurrent layer 1 model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length): h = Dense(latent_rep_size, name='latent_input', activation = 'relu')(z) h = RepeatVector(max_length, name='repeat_vector')(h) h = GRU(501, return_sequences = True, name='gru_1')(h) h = GRU(501, return_sequences = True, name='gru_2')(h) h = GRU(501, return_sequences = True, name='gru_3')(h) return TimeDistributed(Dense(charset_length, activation='softmax'), name='decoded_mean')(h)
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize("layer_class", [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
def build_model(input_size, seq_len, hidden_size): """???? seq2seq ??""" model = Sequential() model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False)) model.add(Dense(hidden_size, activation="relu")) model.add(RepeatVector(seq_len)) model.add(GRU(hidden_size, return_sequences=True)) model.add(TimeDistributed(Dense(output_dim=input_size, activation="softmax"))) model.compile(loss="categorical_crossentropy", optimizer='adam') return model
def build_model(input_size, seq_len, hidden_size): """???? sequence to sequence ??""" model = Sequential() model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False)) model.add(Dense(hidden_size, activation="relu")) model.add(RepeatVector(seq_len)) model.add(GRU(hidden_size, return_sequences=True)) model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear"))) model.compile(loss="mse", optimizer='adam') return model
def understand_return_sequence(): """?????? recurrent layer ?? return_sequences ??""" model_1 = Sequential() model_1.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) model_1.compile(loss='mean_squared_error', optimizer='sgd') train_x = np.random.randn(100, 78, 256) train_y = np.random.randn(100, 78, 256) model_1.fit(train_x, train_y, verbose=0) model_2 = Sequential() model_2.add(GRU(input_dim=256, output_dim=256, return_sequences=False)) model_2.compile(loss='mean_squared_error', optimizer='sgd') train_x = np.random.randn(100, 78, 256) train_y = np.random.randn(100, 256) model_2.fit(train_x, train_y, verbose=0) inz = np.random.randn(100, 78, 256) rez_1 = model_1.predict_proba(inz, verbose=0) rez_2 = model_2.predict_proba(inz, verbose=0) print() print('=========== understand return_sequence =================') print('Input shape is: {}'.format(inz.shape)) print('Output shape of model with `return_sequences=True`: {}'.format(rez_1.shape)) print('Output shape of model with `return_sequences=False`: {}'.format(rez_2.shape)) print('====================== end =============================')
def config(c): c['dropout'] = 4/5 c['dropoutfix_inp'] = 0 c['dropoutfix_rec'] = 0 c['l2reg'] = 1e-4 c['rnnbidi'] = True c['rnn'] = GRU c['rnnbidi_mode'] = 'sum' c['rnnact'] = 'tanh' c['rnninit'] = 'glorot_uniform' c['sdim'] = 2 c['rnnlevels'] = 1 c['project'] = True c['pdim'] = 2 c['pact'] = 'tanh' c['pinit'] = 'glorot_uniform' # model-external: c['inp_e_dropout'] = 4/5 c['inp_w_dropout'] = 0 # anssel-specific: c['ptscorer'] = B.mlp_ptscorer c['mlpsum'] = 'sum' c['Ddim'] = 2
def load_model(self, frm_modelweights=''): frm_model = Sequential() frm_model.add(GRU(2048, input_shape=(None, self.feature_size), return_sequences=True, activation='relu', name='fc1')) frm_model.add(Dropout(0.5)) frm_model.add(GRU(2048, return_sequences=True, activation='relu', name='fc2')) frm_model.add(Dropout(0.5)) frm_model.add(GRU(2048, return_sequences=False, activation='relu', name='fc3')) frm_model.add(Dropout(0.5)) frm_model.add(Dense(self.numclasses, activation='softmax', name='prediction')) if frm_modelweights: frm_model.load_weights(frm_modelweights, by_name=True) print("Frame model loaded with weights from %s." % frm_modelweights) else: print "Empty frame model loaded." return frm_model
def load_model(self, frm_modelweights='', frmdiff_modelweights=''): frm_model = Sequential() frm_model.add(GRU(4096, return_sequences=True, input_dim=self.feature_size, input_length=MAX_FRAMES, activation='relu', name='fc1')) frm_model.add(Dropout(0.3)) frm_model.add(GRU(4096, return_sequences=False, activation='relu', name='fc2')) frm_model.add(Dropout(0.3)) frm_model.add(Dense(self.numclasses, activation='softmax', name='frm_prediction')) if frm_modelweights: frm_model.load_weights(frm_modelweights, by_name=True) print("Frame model loaded with weights from %s." % frm_modelweights) else: print "Empty frame model loaded." ''' frmdiff_model = Sequential() frmdiff_model.add(GRU(4096, input_dim=self.feature_size, activation='relu', name='fc1')) frmdiff_model.add(Dropout(0.3)) frmdiff_model.add(GRU(4096, activation='relu', name='fc2')) frmdiff_model.add(Dropout(0.3)) frmdiff_model.add(Dense(self.numclasses, activation='softmax', name='frmdiff_feature')) if frmdiff_modelweights: frmdiff_model.load_weights(frmdiff_modelweights, by_name=True) print('Frame model loaded with weights from %s.' % frmdiff_modelweights) else: print "Empty frame model loaded." model = Sequential() model.add(Merge([frm_model, frmdiff_model], mode='concat')) model.add(Dense(self.numclasses, activation='softmax', name='predictions')) ''' return frm_model
def test_temporal_reg(self): print('temporal regression data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,), classification=False) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) model = Sequential() model.add(GRU(X_train.shape[-1], y_train.shape[-1])) model.compile(loss='hinge', optimizer='rmsprop') history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2) self.assertTrue(history.validation_loss[-1] < 0.75)
def build(self, input_shape): bs, input_length, input_dim = input_shape self.controller_input_dim, self.controller_output_dim = controller_input_output_shape( input_dim, self.units, self.m_depth, self.n_slots, self.shift_range, self.read_heads, self.write_heads) # Now that we've calculated the shape of the controller, we have add it to the layer/model. if self.controller is None: self.controller = Dense( name = "controller", activation = 'linear', bias_initializer = 'zeros', units = self.controller_output_dim, input_shape = (bs, input_length, self.controller_input_dim)) self.controller.build(input_shape=(self.batch_size, input_length, self.controller_input_dim)) self.controller_with_state = False # This is a fixed shift matrix self.C = _circulant(self.n_slots, self.shift_range) self.trainable_weights = self.controller.trainable_weights # We need to declare the number of states we want to carry around. # In our case the dimension seems to be 6 (LSTM) or 5 (GRU) or 4 (FF), # see self.get_initial_states, those respond to: # [old_ntm_output] + [init_M, init_wr, init_ww] + [init_h] (LSMT and GRU) + [(init_c] (LSTM only)) # old_ntm_output does not make sense in our world, but is required by the definition of the step function we # intend to use. # WARNING: What self.state_spec does is only poorly understood, # I only copied it from keras/recurrent.py. self.states = [None, None, None, None] self.state_spec = [InputSpec(shape=(None, self.output_dim)), # old_ntm_output InputSpec(shape=(None, self.n_slots, self.m_depth)), # Memory InputSpec(shape=(None, self.read_heads, self.n_slots)), # weights_read InputSpec(shape=(None, self.write_heads, self.n_slots))] # weights_write super(NeuralTuringMachine, self).build(input_shape)
def get_nets(name): if name=='LSTM': return recurrent.LSTM elif name=='GRU': return recurrent.GRU else: return recurrent.SimpleRNN
def fit(self, x, y): input_dim = x.shape[1] output_dim = y.shape[1] self.x_train = x start = len(x) % (self.batch_size * self.sequence_length) x_seq = self.sliding_window(x.iloc[start:]) y_seq = self.sliding_window(y.iloc[start:]) model = Sequential() model.add(GRU(1024, batch_input_shape=(self.batch_size, self.sequence_length, input_dim), return_sequences=True, stateful=True)) model.add(Activation("tanh")) model.add(GRU(1024, return_sequences=True)) model.add(Activation("tanh")) model.add(GRU(512, return_sequences=True)) model.add(Activation("tanh")) #model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(output_dim))) model.add(Activation("linear")) optimizer = keras.optimizers.RMSprop(lr=0.002) optimizer = keras.optimizers.Nadam(lr=0.002) model.compile(loss='mse', optimizer=optimizer) model.fit(x_seq, y_seq, batch_size=self.batch_size, verbose=1, nb_epoch=self.n_epochs, shuffle=False) self.model = model return self
def make_model(embedding_weights, input_length=50): """Build an recurrent net based off the input parameters and return it compiled. Args: ---- embedding_weights: 2d np.ndarray input_length (optional): int Holds how many words each article body will hold Return: ------ model: keras.model.Sequential compiled model """ dict_size = embedding_weights.shape[0] # Num words in corpus embedding_dim = embedding_weights.shape[1] # Num dims in vec representation bodies = Input(shape=(input_length,), dtype='int32') embeddings = Embedding(input_dim=dict_size, output_dim=embedding_dim, weights=[embedding_weights], dropout=0.5)(bodies) layer = GRU(1024, return_sequences=True, dropout_W=0.5, dropout_U=0.5)(embeddings) layer = GRU(1024, return_sequences=False, dropout_W=0.5, dropout_U=0.5)(layer) layer = Dense(dict_size, activation='softmax')(layer) model = Model(input=bodies, output=layer) model.compile(loss='categorical_crossentropy', optimizer='adagrad') return model
def buildModel(self): ''' :Description: Build neural network model ''' self.model = Sequential() self.model.add(Embedding(self.embedding, 16, input_length=self.max_len)) for l in range(self.layers-1): self.model.add(GRU(self.neurons, activation=self.activation, return_sequences=True, dropout_W=self.dropout, dropout_U=self.dropout)) self.model.add(GRU(self.neurons, activation=self.activation, return_sequences=False, dropout_W=self.dropout, dropout_U=self.dropout)) self.model.add(Dense(self.n_songs)) self.model.add(Activation('softmax'))
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True): model = Sequential() if lembedding.vector_box.W is None: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, W_constraint=None) else: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, weights=[lembedding.vector_box.W], W_constraint=None) emb.trainable = train_vectors model.add(emb) if unit == 'gru': model.add(GRU(rnn_size)) else: model.add(LSTM(rnn_size)) model.add(Dropout(0.2)) if num_classes == 2: model.add(Dense(1, activation='sigmoid')) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: if self.optimizer is None: self.optimizer = 'adam' model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True): input = Input(shape=(lembedding.size,), dtype='int32') if lembedding.vector_box.W is None: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, W_constraint=None)(input) else: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, weights=[lembedding.vector_box.W], W_constraint=None, )(input) emb.trainable = train_vectors if unit == 'gru': forward = GRU(rnn_size)(emb) backward = GRU(rnn_size, go_backwards=True)(emb) else: forward = LSTM(rnn_size)(emb) backward = LSTM(rnn_size, go_backwards=True)(emb) merged_rnn = merge([forward, backward], mode='concat') dropped = Dropout(0.5)(merged_rnn) if num_classes == 2: out = Dense(1, activation='sigmoid')(dropped) model = Model(input=input, output=out) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: out = Dense(num_classes, activation='softmax')(dropped) model = Model(input=input, output=out) if self.optimizer is None: self.optimizer = 'adam' model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
def __init__(self, lembedding, num_classes=2, ngrams=[1, 2, 3, 4, 5], nfilters=64, rnn_type=GRU, rnn_dim=80, train_vectors=True, optimizer=None): if not isinstance(lembedding, TwoLevelsEmbedding): raise LanguageClassifierException( "The model only accepts two-level language embeddings") if num_classes < 2: raise LanguageClassifierException("Classes must be 2 or more") self.optimizer = optimizer model = self._generate_model(lembedding, num_classes, ngrams, nfilters, rnn_type, rnn_dim, train_vectors) super(RCNNClassifier, self).__init__(model, self.optimizer)
def build_lstm(input_shape): model = Sequential() model.add(Masking(input_shape=input_shape, mask_value=-1.)) # model.add(GRU(128, return_sequences=True)) model.add(GRU(128, return_sequences=False)) # Add dropout if overfitting # model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def test_gru(self): _runner(recurrent.GRU)
def test_temporal_reg(self): print('temporal regression data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5), output_shape=(2,), classification=False) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) model = Sequential() model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1]))) model.compile(loss='hinge', optimizer='adam') history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2) self.assertTrue(history.history['val_loss'][-1] < 0.8)
def LSTMModel(self, nHidden=150, lr = 0.01): # print('nHidden: %i\tlr: %.3f' % ( nHidden, lr) ) self.rnnModel.add(GRU( nHidden, activation='sigmoid', input_shape =( None, self.maxFeatures), return_sequences=True)) # self.rnnModel.add(LSTM( nHidden, activation='sigmoid', input_shape =( None, nHidden), return_sequences=True)) self.rnnModel.add(TimeDistributedDense(nHidden)) self.rnnModel.add(Activation('relu')) self.rnnModel.add(TimeDistributedDense(self.maxFeatures)) self.rnnModel.add(Activation('softmax')) rmsprop = RMSprop(lr=lr, rho=0.9, epsilon=1e-06) self.rnnModel.compile(loss='categorical_crossentropy', optimizer=rmsprop)
def BuildModel(): #global dataset_storage,model_storage,history_storage dataset = makeData (Variables = o.Variables) #dataset_storage = dataset model = None history = None modelname = "" print o.Model if "LSTM" in o.Model or "GRU" in o.Model: model, history = buildModel_1hidden(dataset,True) if o.Model == "RNNSV1": model, history = buildModel_RNNSV1(dataset, True) if o.Model == "DenseIP3D": model, history = buildModel_SimpleDense(dataset, False) print ' ------------------------------------------' print o.Model if o.Model == "RNNPlusMV2" or o.Model == "RNNPlusSV1": model, history = buildModel_RNNPlus(dataset, useAdam=True) modelname = o.Version +"_" + o.Model + "_"+ o.Variables + "_" + o.nEpoch + "epoch_" + str( n_events/1000) + 'kEvts_' + str( o.nTrackCut) + 'nTrackCut_' + o.nMaxTrack + "nMaxTrack_" + o.nLSTMClass +"nLSTMClass_" + o.nLSTMNodes +"nLSTMNodes_"+ o.nLayers + "nLayers" model = evalModel(dataset, model, o.Model) if o.TrackOrder == 'pT': modelname += "_SortpT" if o.TrackOrder == 'Reverse': modelname += "_ReverseOrder" if o.TrackOrder == 'SL0': modelname += "_SL0" if o.doTrainC == 'y': modelname += "_CMix" if o.AddJetpT == 'y': modelname += '_AddJetpT' if int(o.EmbedSize) != 2: modelname += "_" + o.EmbedSize+"EmbedSize" if o.Mode == "R": modelname = o.filebase+"_Retrain_"+o.nEpoch if o.doLessC == "y": modelname += "_LessC" if o.doJetpTReweight == "y": modelname += "_JetpTReweight" #modelname = "test" saveModel(modelname, model, history)
def train_breaker(datafilename, sentence_num=1000, puncs=u',?.?!???', \ RNN=recurrent.GRU, HIDDEN_SIZE=128, EPOCH_SIZE=10, validate=True): wordtable = WordTable() wordtable.parse(datafilename, sentence_num) X, Y = [], [] for line in open(datafilename).readlines()[:sentence_num]: line = line.strip().decode('utf-8') line = re.sub(ur'(^[{0}]+)|([{0}]+$)'.format(puncs),'',line) words = wordtable.encode(re.sub(ur'[{0}]'.format(puncs),'',line)) breaks = re.sub(ur'0[{0}]+'.format(puncs),'1',re.sub(ur'[^{0}]'.format(puncs),'0',line)) if len(words) >= 30 and len(words) <= 50 and breaks.count('1') >= 4: x = np.zeros((len(words), wordtable.capacity), dtype=np.bool) y = np.zeros((len(breaks), 2), dtype=np.bool) for idx in xrange(len(words)): x[idx][words[idx]] = True y[idx][int(breaks[idx])] = True X.append(x) Y.append(y) print 'total sentence: ', len(X) if validate: # Set apart 10% for validation split_at = len(X) - len(X)/10 X_train, X_val = X[:split_at], X[split_at:] y_train, y_val = Y[:split_at], Y[split_at:] else: X_train, y_train = X, Y model = Graph() model.add_input(name='input', input_shape=(None, wordtable.capacity)) model.add_node(RNN(HIDDEN_SIZE, return_sequences=True), name='forward', input='input') model.add_node(TimeDistributedDense(2, activation='softmax'), name='softmax', input='forward') model.add_output(name='output', input='softmax') model.compile('adam', {'output': 'categorical_crossentropy'}) for epoch in xrange(EPOCH_SIZE): print "epoch: ", epoch for idx, (seq, label) in enumerate(zip(X_train, y_train)): loss, accuracy = model.train_on_batch({'input':np.array([seq]), 'output':np.array([label])}, accuracy=True) if idx % 20 == 0: print "\tidx={0}, loss={1}, accuracy={2}".format(idx, loss, accuracy) if validate: _Y, _P = [], [] for (seq, label) in zip(X_val, y_val): y = label.argmax(axis=-1) p = model.predict({'input':np.array([seq])})['output'][0].argmax(axis=-1) _Y.extend(list(y)) _P.extend(list(p)) _Y, _P = np.array(_Y), np.array(_P) print "should break right: ", ((_P == 1)*(_Y == 1)).sum() print "should break wrong: ", ((_P == 0)*(_Y == 1)).sum() print "should not break right: ", ((_P == 0)*(_Y == 0)).sum() print "should not break wrong: ", ((_P == 1)*(_Y == 0)).sum() with open('wordtable_json.txt','w') as wordtable_file: wordtable_file.write(wordtable.to_json()) with open('model_json.txt','w') as model_file: model_file.write(model.to_json()) model.save_weights('model_weights.h5', overwrite=True)
def get_state_transfer_rnn(RNN): '''Converts a given Recurrent sub class (e.g, LSTM, GRU) to its state transferable version. A state transfer RNN can transfer its hidden state to another one of the same type and compatible dimensions. ''' class StateTransferRNN(RNN): def __init__(self, state_input=True, **kwargs): self.state_outputs = [] self.state_input = state_input super(StateTransferRNN, self).__init__(**kwargs) def reset_states(self): stateful = self.stateful self.stateful = stateful or self.state_input or len(self.state_outputs) > 0 if self.stateful: super(StateTransferRNN, self).reset_states() self.stateful = stateful def build(self,input_shape): stateful = self.stateful self.stateful = stateful or self.state_input or len(self.state_outputs) > 0 super(StateTransferRNN, self).build(input_shape) self.stateful = stateful def broadcast_state(self, rnns): rnns = (set if type(rnns) in [list, tuple] else lambda a: {a})(rnns) rnns -= set(self.state_outputs) self.state_outputs.extend(rnns) for rnn in rnns: rnn.state_input = self rnn.updates = getattr(rnn, 'updates', []) rnn.updates.extend(zip(rnn.states, self.states_to_transfer)) def call(self, x, mask=None): last_output, outputs, states = K.rnn( self.step, self.preprocess_input(x), self.states or self.get_initial_states(x), go_backwards=self.go_backwards, mask=mask, constants=self.get_constants(x), unroll=self.unroll, input_length=self.input_spec[0].shape[1]) self.updates = zip(self.states, states) self.states_to_transfer = states return outputs if self.return_sequences else last_output return StateTransferRNN
def __init__(self, embedding_mat=None, maxlen_doc=7, maxlen_sent=50, filter_length=[3, 4, 5, 6], nb_filters=200, n_vocab=10000, embedding_dims=300, hidden_gru=64, n_classes=5): if embedding_mat is not None: self.n_vocab, self.embedding_dims = embedding_mat.shape else: self.n_vocab = n_vocab self.embedding_dims = embedding_dims self.maxlen_doc = maxlen_doc self.maxlen_sent = maxlen_sent self.filter_length = filter_length self.nb_filters = nb_filters self.hidden_gru = hidden_gru print "Building the model" #graph model model=Graph() model.add_input(name='input', input_shape=(self.maxlen_doc*self.maxlen_sent,), dtype='int') #Model embedding layer, for word index-> word embedding transformation model.add_node(Embedding(self.n_vocab, self.embedding_dims, weights=[self.embedding_mat], input_length=self.maxlen_sent*self.maxlen_doc), name='embedding', input='input') model.add_node(Reshape((self.maxlen_doc, 1, self.maxlen_sent, self.embedding_dims)), name='reshape_5d', input='embedding') #define the different filters conv_layer = [] for each_length in filter_length: model.add_node(TimeDistributedConvolution2D(self.nb_filters/len(filter_length), each_length, self.embedding_dims, border_mode='valid', input_shape=(self.maxlen_doc,1,self.maxlen_sent, self.embedding_dims)), name='conv_{}'.format(each_length), input='reshape_5d') model.add_node(Activation('relu'), name='relu_conv_{}'.format(each_length), input='conv_{}'.format(each_length)) model.add_node(TimeDistributedMaxPooling2D(pool_size=(int(self.maxlen_sent - each_length+1), 1), border_mode='valid'), name='pool_conv_{}'.format(each_length), input='relu_conv_{}'.format(each_length)) model.add_node(TimeDistributedFlatten(), name='flatten_conv_{}'.format(each_length), input='pool_conv_{}'.format(each_length)) conv_layer.append('flatten_conv_{}'.format(each_length)) # model.add_node(Activation('relu'), name='relu', inputs=conv_layer) print conv_layer model.add_node(GRU(self.hidden_gru), name='gru_forward', inputs=conv_layer) model.add_node(GRU(self.hidden_gru, go_backwards=True), name='gru_backward', inputs=conv_layer) model.add_node(Dropout(0.5), name='gru_outputs', inputs=['gru_forward', 'gru_backward']) model.add_node(Dense(n_classes), name='full_con', input='gru_outputs') model.add_node(Activation('softmax'), name='prob', input='full_con') model.add_output(name='pred', input='prob') model.compile('rmsprop', loss = {'pred': 'categorical_crossentropy'})
def _generate_model(self, lembedding, num_classes=2, rnn_dim=32): WORD_PER_SENTENCES = lembedding.size_level1 SENTENCES_PER_DOCUMENT = lembedding.size_level2 EMBEDDING_DIM = lembedding.vector_box.vector_dim INPUT_SHAPE = (WORD_PER_SENTENCES * SENTENCES_PER_DOCUMENT, ) EMBEDDING_SHAPE = (SENTENCES_PER_DOCUMENT, WORD_PER_SENTENCES, EMBEDDING_DIM) doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32') embedded = Sequential([ Embedding( input_dim=lembedding.vector_box.size, output_dim=EMBEDDING_DIM, input_length=INPUT_SHAPE[0] ), Reshape(EMBEDDING_SHAPE) ])(doc) out = TimeDistributed(GRU(rnn_dim))(embedded) next = Dropout(0.5)(out) out = GRU(rnn_dim)(next) out = Dropout(0.5)(out) mapping = [ Dense(64, activation='relu'), # Maybe add more layers ] for f in mapping: out = f(out) if num_classes == 2: out = Dense(1, activation='sigmoid')(out) model = Model(input=doc, output=out) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: out = Dense(num_classes, activation='softmax')(out) model = Model(input=doc, output=out) if self.optimizer is None: self.optimizer = 'adam' model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
def lstm_memory_train(X_train_list,y_train,vocab_size): N=len(X_train_list) X_train_list = [sequence.pad_sequences(x_train, maxlen=MAX_LEN) for x_train in X_train_list] input_list=[] out_list=[] for i in range(N): input,out=get_embedding_input_output('f%d' %i,vocab_size) input_list.append(input) out_list.append(out) x = merge(out_list,mode='concat') lstm_out = LSTM(HIDDEN_SIZE, return_sequences=True)(x) lstm_share=GRU(HIDDEN_SIZE, return_sequences=True) x = lstm_out for i in range(2): att = TimeDistributed(Dense(1))(x) att = Flatten()(att) att = Activation(activation="softmax")(att) att = RepeatVector(HIDDEN_SIZE)(att) att = Permute((2,1))(att) mer = merge([att, lstm_out], "mul") mer = merge([mer, out_list[-1]], 'mul') z = merge([lstm_out,mer],'sum') z = lstm_share(z) x = z hid = AveragePooling1D(pool_length=2)(x) hid = Flatten()(hid) #hid = merge([hid,out_list[-1]], mode='concat') main_loss = Dense(1, activation='sigmoid', name='main_output')(hid) model = Model(input=input_list, output=main_loss) model.compile(loss='binary_crossentropy', optimizer='rmsprop') model.fit(X_train_list, y_train, batch_size=BATCH_SIZE, nb_epoch=EPOCHS) return model
def SiameseLSTM(max_token_length, hidden_size, embedding_size=300): text_input_1 = Input(shape=(max_token_length, embedding_size), name='text_1') text_mask_1 = Masking(mask_value=0.0, name='text_mask_1')(text_input_1) # text_dropout_1 = Dropout(.5, name='text_dropout_1')(text_mask_1) text_input_2 = Input(shape=(max_token_length, embedding_size), name='text_2') text_mask_2 = Masking(mask_value=0.0, name='text_mask_2')(text_input_2) # text_dropout_2 = Dropout(.5, name='text_dropout_2')(text_mask_2) lstm_1_a = Bidirectional(GRU(units=hidden_size, return_sequences=True, name='RNN_1_a'))(text_mask_1) lstm_1_b = Bidirectional(GRU(units=hidden_size, return_sequences=False, name='RNN_1_b'))(lstm_1_a) """ lstm_1_c = Bidirectional(GRU(units=hidden_size, return_sequences=False, name='RNN_1_c'))(lstm_1_b) """ lstm_2_a = Bidirectional(GRU(units=hidden_size, return_sequences=True, name='RNN_2_a'))(text_mask_2) lstm_2_b = Bidirectional(GRU(units=hidden_size, return_sequences=False, name='RNN_2_b'))(lstm_2_a) """ lstm_2_c = Bidirectional(GRU(units=hidden_size, return_sequences=False, name='RNN_2_c'))(lstm_2_b) """ cosine_similarity = Dot(axes=1, normalize=True, name='cosine_similarity')([lstm_1_b, lstm_2_b]) model = Model(inputs=[text_input_1, text_input_2], outputs=cosine_similarity) return model
def create_model(self, n_timesteps=None, batch_size=1, include_pred_layer=True): input_layers = [] seq_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="seq_input_layer") input_layers.append(seq_input_layer) seq_embedding_layer = Embedding(input_dim=self.lexicon_size + 1, output_dim=self.n_embedding_nodes, mask_zero=True, name='seq_embedding_layer')(seq_input_layer) for layer_num in range(self.n_hidden_layers): if layer_num == 0: seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer1')(seq_embedding_layer) else: #add extra hidden layers seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer' + str(layer_num + 1))(seq_hidden_layer) if self.use_pos: pos_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="pos_input_layer") input_layers.append(pos_input_layer) pos_embedding_layer = Embedding(input_dim=self.n_pos_tags + 1, output_dim=self.n_pos_embedding_nodes, mask_zero=True, name='pos_embedding_layer')(pos_input_layer) pos_hidden_layer = GRU(output_dim=self.n_pos_nodes, return_sequences=True, stateful=True, name='pos_hidden_layer')(pos_embedding_layer) seq_hidden_layer = merge([seq_hidden_layer, pos_hidden_layer], mode='concat', concat_axis=-1, name='pos_merge_hidden_layer') if self.use_features: feature_input_layer = Input(batch_shape=(batch_size, self.lexicon_size + 1), name="feature_input_layer") input_layers.append(feature_input_layer) feature_hidden_layer = Dense(output_dim=self.n_feature_nodes, activation='sigmoid', name='feature_hidden_layer')(feature_input_layer) feature_hidden_layer = RepeatVector(n_timesteps)(feature_hidden_layer) seq_hidden_layer = merge([seq_hidden_layer, feature_hidden_layer], mode='concat', concat_axis=-1, name='feature_merge_hidden_layer') output_layers = [] if include_pred_layer: pred_layer = TimeDistributed(Dense(self.lexicon_size + 1, activation="softmax", name='pred_layer'))(seq_hidden_layer) output_layers.append(pred_layer) if self.use_pos: pred_pos_layer = TimeDistributed(Dense(self.n_pos_tags + 1, activation="softmax", name='pred_pos_layer'))(seq_hidden_layer) output_layers.append(pred_pos_layer) model = Model(input=input_layers, output=output_layers) #select optimizer and compile model.compile(loss="sparse_categorical_crossentropy", optimizer=eval(self.optimizer)(clipvalue=self.clipvalue, lr=self.lr, decay=self.decay)) return model