我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用lasagne.regularization()。
def parse_args(argv): """ Parse commandline arguments. Arguments: argv -- An argument list without the program name. """ parser = argparse.ArgumentParser() parser.add_argument('-i', '--dataset', metavar='str', help='dataset for dialectology', type=str, default='na') parser.add_argument('-bucket', '--bucket', metavar='int', help='discretisation bucket size', type=int, default=300) parser.add_argument('-batch', '--batch', metavar='int', help='SGD batch size', type=int, default=0) parser.add_argument('-hid', '--hidden', metavar='int', help='Hidden layer size', type=int, default=500) parser.add_argument('-mindf', '--mindf', metavar='int', help='minimum document frequency in BoW', type=int, default=10) parser.add_argument('-d', '--dir', metavar='str', help='home directory', type=str, default='./data') parser.add_argument('-enc', '--encoding', metavar='str', help='Data Encoding (e.g. latin1, utf-8)', type=str, default='utf-8') parser.add_argument('-reg', '--regularization', metavar='float', help='regularization coefficient)', type=float, default=1e-6) parser.add_argument('-drop', '--dropout', metavar='float', help='dropout coef default 0.5', type=float, default=0.5) parser.add_argument('-cel', '--celebrity', metavar='int', help='celebrity threshold', type=int, default=10) parser.add_argument('-conv', '--convolution', action='store_true', help='if true do convolution') parser.add_argument('-map', '--map', action='store_true', help='if true just draw maps from pre-trained model') parser.add_argument('-sqerror', '--sqerror', action='store_true', help='if exists use squared error regression instead of gaussian mixture model') parser.add_argument('-autoencoder', '--autoencoder', type=int, help='if not zero pre-trains the model with input lat/lon and output lat/lon for n steps', default=0) parser.add_argument('-grid', '--grid', action='store_true', help='if exists transforms the input from lat/lon to distance from grids on map') parser.add_argument('-rbf', '--rbf', action='store_true', help='if exists transforms the input from lat/lon to rbf probabilities and learns centers and sigmas as well.') parser.add_argument('-ncomp', '--ncomp', type=int, help='the number of bivariate gaussians whose parameters are going to be learned.', default=100) parser.add_argument('-toy', action='store_true', help='if exists use the toy dataset instead of geolocation datasets.') parser.add_argument('-tune', action='store_true', help='if exists tune hyperparameters') parser.add_argument('-m', '--message', type=str) args = parser.parse_args(argv) return args
def tune(data, dataset_name, args, num_iter=100): logging.info('tuning over %s' %dataset_name) param_scores = [] random.seed() for i in xrange(num_iter): logging.info('tuning iter %d' %i) np.random.seed(77) hidden_size = random.choice([300, 600, 900]) ncomp = random.choice([250, 500, 1000]) if args.nomdn: ncomp = 0 logging.info('hidden %d ncomp %d' %(hidden_size, ncomp)) try: perplexity_test, perplexity_dev = train(data, regul_coef=args.regularization, dropout_coef=args.dropout, hidden_size=hidden_size, autoencoder=args.autoencoder, ncomp=ncomp, dataset_name=dataset_name, tune=True, nomdn=args.nomdn) except: logging.info('exception occurred') continue scores = OrderedDict() scores['perplexity_test'], scores['perplexity_dev'] = perplexity_test, perplexity_dev params = OrderedDict() params['hidden'], params['ncomp'] = hidden_size, ncomp param_scores.append([params, scores]) logging.info(params) logging.info(scores) for param_score in param_scores: logging.info(param_score)
def parse_args(argv): """ Parse commandline arguments. Arguments: argv -- An argument list without the program name. """ parser = argparse.ArgumentParser() parser.add_argument( '-i','--dataset', metavar='str', help='dataset for dialectology', type=str, default='na') parser.add_argument( '-bucket','--bucket', metavar='int', help='discretisation bucket size', type=int, default=300) parser.add_argument( '-batch','--batch', metavar='int', help='SGD batch size', type=int, default=1000) parser.add_argument( '-hid','--hidden', metavar='int', help='Hidden layer size after bigaus layer', type=int, default=500) parser.add_argument( '-mindf','--mindf', metavar='int', help='minimum document frequency in BoW', type=int, default=10) parser.add_argument( '-d','--dir', metavar='str', help='home directory', type=str, default='./data') parser.add_argument( '-enc','--encoding', metavar='str', help='Data Encoding (e.g. latin1, utf-8)', type=str, default='utf-8') parser.add_argument( '-reg','--regularization', metavar='float', help='regularization coefficient)', type=float, default=1e-6) parser.add_argument( '-drop','--dropout', metavar='float', help='dropout coef default 0.5', type=float, default=0.5) parser.add_argument( '-cel','--celebrity', metavar='int', help='celebrity threshold', type=int, default=10) parser.add_argument( '-conv', '--convolution', action='store_true', help='if true do convolution') parser.add_argument( '-map', '--map', action='store_true', help='if true just draw maps from pre-trained model') parser.add_argument( '-tune', '--tune', action='store_true', help='if true tune the hyper-parameters') parser.add_argument( '-tf', '--tensorflow', action='store_true', help='if exists run with tensorflow') parser.add_argument( '-autoencoder', '--autoencoder', type=int, help='the number of autoencoder steps before training', default=0) parser.add_argument( '-grid', '--grid', action='store_true', help='if exists transforms the input from lat/lon to distance from grids on map') parser.add_argument( '-ncomp', type=int, help='the number of bivariate gaussians after the input layer', default=500) parser.add_argument( '-m', '--message', type=str) parser.add_argument( '-vbi', '--vbi', type=str, help='if exists load params from vbi file and visualize bivariate gaussians on a map', default=None) parser.add_argument( '-nomdn', '--nomdn', action='store_true', help='if true use tanh layer instead of MDN') args = parser.parse_args(argv) return args
def parse_args(argv): """ Parse commandline arguments. Arguments: argv -- An argument list without the program name. """ parser = argparse.ArgumentParser() parser.add_argument('-i','--dataset', metavar='str', help='dataset for dialectology', type=str, default='na') parser.add_argument('-bucket','--bucket', metavar='int', help='discretisation bucket size', type=int, default=300) parser.add_argument('-batch','--batch', metavar='int', help='SGD batch size', type=int, default=0) parser.add_argument('-hid','--hidden', metavar='int', help='Hidden layer size', type=int, default=500) parser.add_argument('-mindf','--mindf', metavar='int', help='minimum document frequency in BoW', type=int, default=10) parser.add_argument('-d','--dir', metavar='str', help='home directory', type=str, default='./data') parser.add_argument('-enc','--encoding', metavar='str', help='Data Encoding (e.g. latin1, utf-8)', type=str, default='utf-8') parser.add_argument('-reg','--regularization', metavar='float', help='regularization coefficient)', type=float, default=1e-6) parser.add_argument('-drop','--dropout', metavar='float', help='dropout coef default 0.5', type=float, default=0.5) parser.add_argument('-cel','--celebrity', metavar='int', help='celebrity threshold', type=int, default=10) parser.add_argument('-conv', '--convolution', action='store_true', help='if true do convolution') parser.add_argument('-map', '--map', action='store_true', help='if true just draw maps from pre-trained model') parser.add_argument('-sqerror', '--sqerror', action='store_true', help='if exists use squared error regression instead of gaussian mixture model') parser.add_argument('-autoencoder', '--autoencoder', type=int, help='if not zero pre-trains the model with input lat/lon and output lat/lon for n steps', default=0) parser.add_argument('-grid', '--grid', action='store_true', help='if exists transforms the input from lat/lon to distance from grids on map') parser.add_argument('-rbf', '--rbf', action='store_true', help='if exists transforms the input from lat/lon to rbf probabilities and learns centers and sigmas as well.') parser.add_argument('-ncomp', '--ncomp', type=int, help='the number of bivariate gaussians whose parameters are going to be learned.', default=100) parser.add_argument('-toy', action='store_true', help='if exists use the toy dataset instead of geolocation datasets.') parser.add_argument('-tune', action='store_true', help='if exists tune hyperparameters') parser.add_argument('-m', '--message', type=str) args = parser.parse_args(argv) return args
def loss_fn(model_predict, target_var, reg=None, network=None, layers=None): """ Create a loss expression for training, i.e., a scalar objective we want to minimize (for our multi-class problem, it is the cross-entropy loss) """ loss_temp = lasagne.objectives.categorical_crossentropy(model_predict, target_var) loss_temp = loss_temp.mean() # Optional regularization # layers={layer_1:1e-7,layer_2:1e-7,network:1e-7} # l2_penalty=lasagne.regularization.regularize_layer_params_weighted(layers, # l2) if reg == 'l2': if layers is not None: layer_1 = layers[0] layer_2 = layers[1] layer_dict = {layer_1: 1e-7} l2_penalty = lasagne.regularization \ .regularize_layer_params_weighted(layer_dict, l2) else: l2_penalty = (1e-7 * lasagne.regularization.regularize_network_params(network, l2)) loss_temp = loss_temp + l2_penalty elif reg == 'l1': l1_penalty = lasagne.regularization.regularize_network_params( network, l1) loss_temp = loss_temp + 1e-7 * l1_penalty return loss_temp #------------------------------------------------------------------------------#
def __init__(self, input_shape, batch_size, num_actions, num_hidden, discount, learning_rate, regularization, update_rule, freeze_interval, rng): self.input_shape = input_shape self.batch_size = batch_size self.num_actions = num_actions self.num_hidden = num_hidden self.discount = discount self.learning_rate = learning_rate self.regularization = regularization self.update_rule = update_rule self.freeze_interval = freeze_interval self.rng = rng if rng else np.random.RandomState() self.initialize_network() self.update_counter = 0
def main(): parser = argparse.ArgumentParser(description='Unet model training') parser.add_argument('-dataset', default='camvid', help='Dataset.') parser.add_argument('-learning_rate', default=0.0001, help='Learning Rate') parser.add_argument('-penal_cst', default=0.0, help='regularization constant') parser.add_argument('--num_epochs', '-ne', type=int, default=750, help='Optional. Int to indicate the max' 'number of epochs.') parser.add_argument('-max_patience', type=int, default=100, help='Max patience') parser.add_argument('-batch_size', type=int, default=[10, 1, 1], help='Batch size [train, val, test]') parser.add_argument('-data_augmentation', type=dict, default={'crop_size': (224, 224), 'horizontal_flip': True, 'fill_mode':'constant'}, help='use data augmentation') parser.add_argument('-early_stop_class', type=int, default=None, help='class to early stop on') parser.add_argument('-train_from_0_255', type=bool, default=False, help='Whether to train from images within 0-255 range') args = parser.parse_args() train(args.dataset, float(args.learning_rate), float(args.penal_cst), int(args.num_epochs), int(args.max_patience), data_augmentation=args.data_augmentation, batch_size=args.batch_size, early_stop_class=args.early_stop_class, savepath=SAVEPATH, train_from_0_255=args.train_from_0_255, loadpath=LOADPATH)
def build(self): """ build the MDN network with shared Gaussian parameters Input is sparse text and output is the parameters of the mixture of Gaussian """ self.X_sym = S.csr_matrix(name='inputs', dtype=self.dtype) self.Y_sym = T.matrix(name='y_true', dtype=self.dtype) l_in_text = lasagne.layers.InputLayer(shape=(None, self.input_size), input_var=self.X_sym) if self.drop_out and self.dropout_coef > 0: l_in_text = lasagne_layers.SparseInputDropoutLayer(l_in_text, p=self.dropout_coef) l_hid_text = SparseInputDenseLayer(l_in_text, num_units=self.hid_size, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform()) # if self.drop_out and self.dropout_coef > 0: # l_hid_text = lasagne.layers.dropout(l_hid_text, p=self.dropout_coef) self.l_pi_out = lasagne_layers.MDNSharedParams(l_hid_text, num_units=self.n_bigaus_comp, mus=self.mus, sigmas=self.sigmas, corxy=self.corxy, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.GlorotUniform()) pis = lasagne.layers.get_output(self.l_pi_out, self.X_sym) #use the shared gaussian parameters of the layer mus, sigmas, corxy = self.l_pi_out.mus, self.l_pi_out.sigmas, self.l_pi_out.corxy sigmas = T.nnet.softplus(sigmas) corxy = T.nnet.nnet.softsign(corxy) loss = self.nll_loss_sharedparams(mus, sigmas, corxy, pis, self.Y_sym) #we can add an autoencoder loss if we want here #sq_error_coef = 0.01 #predicted_mu = self.get_symb_mus(mus, sigmas, corxy, pis, prediction_method="pi") #loss += lasagne.objectives.squared_error(predicted_mu, self.Y_sym).mean() * sq_error_coef #if regul_coef is more than 0 apply regularization if self.regul_coef: l1_share_out = 0.5 l1_share_hid = 0.5 regul_coef_out, regul_coef_hid = self.regul_coef, self.regul_coef logging.info('regul coefficient for output and hidden lasagne_layers is ' + str(self.regul_coef)) l1_penalty = lasagne.regularization.regularize_layer_params(self.l_pi_out, l1) * regul_coef_out * l1_share_out l2_penalty = lasagne.regularization.regularize_layer_params(self.l_pi_out, l2) * regul_coef_out * (1 - l1_share_out) l1_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l1) * regul_coef_hid * l1_share_hid l2_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l2) * regul_coef_hid * (1 - l1_share_hid) loss += l1_penalty + l2_penalty parameters = lasagne.layers.get_all_params(self.l_pi_out, trainable=True) updates = lasagne.updates.adam(loss, parameters, learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8) self.f_train = theano.function([self.X_sym, self.Y_sym], loss, updates=updates, on_unused_input='warn') # , mode=theano.compile.MonitorMode(pre_func=inspect_inputs, post_func=inspect_outputs)) self.f_val = theano.function([self.X_sym, self.Y_sym], loss, on_unused_input='warn') self.f_predict = theano.function([self.X_sym], [mus, sigmas, corxy, pis], on_unused_input='warn')
def build_squarederror_regression(self): """ This is only used if we want to build a regression model """ self.X_sym = S.csr_matrix(name='inputs', dtype=self.dtype) self.Y_sym = T.matrix(name='y_true', dtype=self.dtype) self.X_autoencoder_sym = T.matrix(name='x_autoencoder', dtype=self.dtype) self.Y_autoencoder_sym = T.matrix(name='y_autoencoder', dtype=self.dtype) l_in_text = lasagne.layers.InputLayer(shape=(None, self.input_size), input_var=self.X_sym) if self.drop_out and self.dropout_coef > 0: l_in_text = lasagne_layers.SparseInputDropoutLayer(l_in_text, p=self.dropout_coef) l_hid_text = SparseInputDenseLayer(l_in_text, num_units=self.hid_size, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform()) #if self.drop_out and self.dropout_coef > 0: # l_hid_text = lasagne.layers.dropout(l_hid_text, p=self.dropout_coef) self.l_out = lasagne.layers.DenseLayer(l_hid_text, num_units=2, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.GlorotUniform()) output = lasagne.layers.get_output(self.l_out, self.X_sym) loss = lasagne.objectives.squared_error(output, self.Y_sym).mean() output_eval = lasagne.layers.get_output(self.l_out, self.X_sym, deterministic=True) if self.regul_coef: l1_share_out = 0.5 l1_share_hid = 0.5 regul_coef_out, regul_coef_hid = self.regul_coef, self.regul_coef logging.info('regul coefficient for output and hidden lasagne_layers is ' + str(self.regul_coef)) l1_penalty = lasagne.regularization.regularize_layer_params(self.l_out, l1) * regul_coef_out * l1_share_out l2_penalty = lasagne.regularization.regularize_layer_params(self.l_out, l2) * regul_coef_out * (1 - l1_share_out) l1_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l1) * regul_coef_hid * l1_share_hid l2_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l2) * regul_coef_hid * (1 - l1_share_hid) loss = loss + l1_penalty + l2_penalty parameters = lasagne.layers.get_all_params(self.l_out, trainable=True) updates = lasagne.updates.adam(loss, parameters, learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8) self.f_train = theano.function([self.X_sym, self.Y_sym], loss, updates=updates, on_unused_input='warn') self.f_val = theano.function([self.X_sym, self.Y_sym], loss, on_unused_input='warn') self.f_predict = theano.function([self.X_sym], output_eval, on_unused_input='warn')
def __init__(self, input_shape, batch_size, num_hidden_layers, num_actions, num_hidden, discount, learning_rate, regularization, update_rule, freeze_interval, rng): """ :type input_shape: int :param input_shape: the dimension of the input representation of the state :type batch_size: int :param batch_size: number of samples to use in computing the loss / updates :type num_hidden_layers: int :param num_hidden_layers: number of hidden layers to use in the network :type num_actions: int :param num_actions: the output dimension of the network measured in number of possible actions :type num_hidden: int :param num_hidden: number of hidden nodes to use in each layer (const across layers) :type discount: float :param discount: discount factor to use in computing Q-learning target values :type learning_rate: float :param learning_rate: the learning rate to use (no decay schedule since ADAM update assumed) :type regularization: float :param regularization: l2 regularization constant applied to weights :type update_rule: string :param update_rule: the type of update rule to use, suggest using 'adam' :type freeze_interval: int :param freeze_interval: the number of updates between updating the target network weights :type rng: rng :param rng: rng for running deterministically, o/w just leave as None :example call: network = qnetwork.QNetwork(input_shape=20, batch_size=64, num_hidden_layers=2, num_actions=4, num_hidden=4, discount=1, learning_rate=1e-3, regularization=1e-4, update_rule='adam', freeze_interval=1e5, rng=None) """ self.input_shape = input_shape self.batch_size = batch_size self.num_hidden_layers = num_hidden_layers self.num_actions = num_actions self.num_hidden = num_hidden self.discount = discount self.learning_rate = learning_rate self.regularization = regularization self.update_rule = update_rule self.freeze_interval = freeze_interval self.rng = rng if rng else np.random.RandomState() self.initialize_network() self.update_counter = 0
def __init__(self, input_shape, sequence_length, batch_size, num_actions, num_hidden, discount, learning_rate, regularization, update_rule, freeze_interval, network_type, rng): """ :type input_shape: int :param input_shape: the dimension of the input representation of the state :type sequence_length: int :param sequence_length: the length to back propagate through time :type batch_size: int :param batch_size: number of samples to use in computing the loss / updates :type num_hidden_layers: int :param num_hidden_layers: number of hidden layers to use in the network :type num_actions: int :param num_actions: the output dimension of the network measured in number of possible actions :type num_hidden: int :param num_hidden: number of hidden nodes to use in each layer (const across layers) :type discount: float :param discount: discount factor to use in computing Q-learning target values :type learning_rate: float :param learning_rate: the learning rate to use (no decay schedule since ADAM update assumed) :type regularization: float :param regularization: l2 regularization constant applied to weights :type update_rule: string :param update_rule: the type of update rule to use, suggest using 'adam' :type freeze_interval: int :param freeze_interval: the number of updates between updating the target network weights :type rng: rng :param rng: rng for running deterministically, o/w just leave as None :example call: network = qnetwork.QNetwork(input_shape=20, batch_size=64, num_hidden_layers=2, num_actions=4, num_hidden=4, discount=1, learning_rate=1e-3, regularization=1e-4, update_rule='adam', freeze_interval=1e5, rng=None) """ self.input_shape = input_shape self.sequence_length = sequence_length self.batch_size = batch_size self.num_actions = num_actions self.num_hidden = num_hidden self.discount = discount self.learning_rate = learning_rate self.regularization = regularization self.update_rule = update_rule self.freeze_interval = freeze_interval self.network_type = network_type self.rng = rng if rng else np.random.RandomState() self.initialize_network() self.update_counter = 0