我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用tensorflow.learn()。
def test_dask_iris_classification(self): if HAS_DASK and HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top import dask.dataframe as dd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) data = dd.from_pandas(data, npartitions=2) labels = pd.DataFrame(iris.target) labels = dd.from_pandas(labels, npartitions=2) classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) predictions = data.map_partitions(classifier.predict).compute() score = accuracy_score(labels.compute(), predictions) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def make_input_pipeline_from_def(def_dict, mode, **kwargs): """Creates an InputPipeline object from a dictionary definition. Args: def_dict: A dictionary defining the input pipeline. It must have "class" and "params" that correspond to the class name and constructor parameters of an InputPipeline, respectively. mode: A value in tf.contrib.learn.ModeKeys Returns: A new InputPipeline object """ if not "class" in def_dict: raise ValueError("Input Pipeline definition must have a class property.") class_ = def_dict["class"] if not hasattr(sys.modules[__name__], class_): raise ValueError("Invalid Input Pipeline class: {}".format(class_)) pipeline_class = getattr(sys.modules[__name__], class_) # Constructor arguments params = {} if "params" in def_dict: params.update(def_dict["params"]) params.update(kwargs) return pipeline_class(params=params, mode=mode)
def get_feature_columns(mode): feature_columns = [] feature_columns.append(tf.contrib.layers.real_valued_column( column_name="context", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64)) feature_columns.append(tf.contrib.layers.real_valued_column( column_name="context_len", dimension=1, dtype=tf.int64)) feature_columns.append(tf.contrib.layers.real_valued_column( column_name="utterance", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64)) feature_columns.append(tf.contrib.layers.real_valued_column( column_name="utterance_len", dimension=1, dtype=tf.int64)) if mode == tf.contrib.learn.ModeKeys.TRAIN: # During training we have a label feature feature_columns.append(tf.contrib.layers.real_valued_column( column_name="label", dimension=1, dtype=tf.int64)) if mode == tf.contrib.learn.ModeKeys.EVAL: # During evaluation we have distractors for i in range(9): feature_columns.append(tf.contrib.layers.real_valued_column( column_name="distractor_{}".format(i), dimension=TEXT_FEATURE_SIZE, dtype=tf.int64)) feature_columns.append(tf.contrib.layers.real_valued_column( column_name="distractor_{}_len".format(i), dimension=1, dtype=tf.int64)) return set(feature_columns)
def create_input_fn(mode, input_files, batch_size, num_epochs): def input_fn(): features = tf.contrib.layers.create_feature_spec_for_parsing( get_feature_columns(mode)) feature_map = tf.contrib.learn.io.read_batch_features( file_pattern=input_files, batch_size=batch_size, features=features, reader=tf.TFRecordReader, randomize_input=True, num_epochs=num_epochs, queue_capacity=200000 + batch_size * 10, name="read_batch_features_{}".format(mode)) # This is an ugly hack because of a current bug in tf.learn # During evaluation TF tries to restore the epoch variable which isn't defined during training # So we define the variable manually here if mode == tf.contrib.learn.ModeKeys.TRAIN: tf.get_variable( "read_batch_features_eval/file_name_queue/limit_epochs/epochs", initializer=tf.constant(0, dtype=tf.int64)) if mode == tf.contrib.learn.ModeKeys.TRAIN: target = feature_map.pop("label") else: # In evaluation we have 10 classes (utterances). # The first one (index 0) is always the correct one target = tf.zeros([batch_size, 1], dtype=tf.int64) return feature_map, target return input_fn
def test_pandas_dataframe(self): if HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.DataFrame(iris.target) classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) score = accuracy_score(labels[0], classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) else: print("No pandas installed. pandas-related tests are skipped.")
def test_pandas_series(self): if HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.Series(iris.target) classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) score = accuracy_score(labels, classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def test_string_data_formats(self): if HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top with self.assertRaises(ValueError): learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]})) with self.assertRaises(ValueError): learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))
def test_pandas_dataframe(self): if HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.DataFrame(iris.target) classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) score = accuracy_score(labels[0], list(classifier.predict(data))) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) else: print("No pandas installed. pandas-related tests are skipped.")
def create_input_fn(pipeline, batch_size, bucket_boundaries=None, allow_smaller_final_batch=False, scope=None): """Creates an input function that can be used with tf.learn estimators. Note that you must pass "factory funcitons" for both the data provider and featurizer to ensure that everything will be created in the same graph. Args: pipeline: An instance of `seq2seq.data.InputPipeline`. batch_size: Create batches of this size. A queue to hold a reasonable number of batches in memory is created. bucket_boundaries: int list, increasing non-negative numbers. If None, no bucket is performed. Returns: An input function that returns `(feature_batch, labels_batch)` tuples when called. """ def input_fn(): """Creates features and labels. """ with tf.variable_scope(scope or "input_fn"): data_provider = pipeline.make_data_provider() features_and_labels = pipeline.read_from_data_provider(data_provider) if bucket_boundaries: _, batch = tf.contrib.training.bucket_by_sequence_length( input_length=features_and_labels["source_len"], bucket_boundaries=bucket_boundaries, tensors=features_and_labels, batch_size=batch_size, keep_input=features_and_labels["source_len"] >= 1, dynamic_pad=True, capacity=5000 + 16 * batch_size, allow_smaller_final_batch=allow_smaller_final_batch, name="bucket_queue") else: batch = tf.train.batch( tensors=features_and_labels, enqueue_many=False, batch_size=batch_size, dynamic_pad=True, capacity=5000 + 16 * batch_size, allow_smaller_final_batch=allow_smaller_final_batch, name="batch_queue") # Separate features and labels features_batch = {k: batch[k] for k in pipeline.feature_keys} if set(batch.keys()).intersection(pipeline.label_keys): labels_batch = {k: batch[k] for k in pipeline.label_keys} else: labels_batch = None return features_batch, labels_batch return input_fn
def get_estimator(args, output_dir, features, stats, target_vocab_size): # Check layers used for dnn models. if is_dnn_model(args.model) and not args.hidden_layer_sizes: raise ValueError('--hidden-layer-size* must be used with DNN models') if is_linear_model(args.model) and args.hidden_layer_sizes: raise ValueError('--hidden-layer-size* cannot be used with linear models') # Build tf.learn features feature_columns = build_feature_columns(features, stats, args.model) # Set how often to run checkpointing in terms of steps. config = tf.contrib.learn.RunConfig( save_checkpoints_steps=args.min_eval_frequency) train_dir = os.path.join(output_dir, 'train') if args.model == 'dnn_regression': estimator = tf.contrib.learn.DNNRegressor( feature_columns=feature_columns, hidden_units=args.hidden_layer_sizes, config=config, model_dir=train_dir, optimizer=tf.train.AdamOptimizer( args.learning_rate, epsilon=args.epsilon)) elif args.model == 'linear_regression': estimator = tf.contrib.learn.LinearRegressor( feature_columns=feature_columns, config=config, model_dir=train_dir, optimizer=tf.train.FtrlOptimizer( args.learning_rate, l1_regularization_strength=args.l1_regularization, l2_regularization_strength=args.l2_regularization)) elif args.model == 'dnn_classification': estimator = tf.contrib.learn.DNNClassifier( feature_columns=feature_columns, hidden_units=args.hidden_layer_sizes, n_classes=target_vocab_size, config=config, model_dir=train_dir, optimizer=tf.train.AdamOptimizer( args.learning_rate, epsilon=args.epsilon)) elif args.model == 'linear_classification': estimator = tf.contrib.learn.LinearClassifier( feature_columns=feature_columns, n_classes=target_vocab_size, config=config, model_dir=train_dir, optimizer=tf.train.FtrlOptimizer( args.learning_rate, l1_regularization_strength=args.l1_regularization, l2_regularization_strength=args.l2_regularization)) else: raise ValueError('bad --model-type value') return estimator
def read_examples(input_files, batch_size, shuffle, num_epochs=None): """Creates readers and queues for reading example protos.""" files = [] for e in input_files: for path in e.split(','): files.extend(file_io.get_matching_files(path)) thread_count = multiprocessing.cpu_count() # The minimum number of instances in a queue from which examples are drawn # randomly. The larger this number, the more randomness at the expense of # higher memory requirements. min_after_dequeue = 1000 # When batching data, the queue's capacity will be larger than the batch_size # by some factor. The recommended formula is (num_threads + a small safety # margin). For now, we use a single thread for reading, so this can be small. queue_size_multiplier = thread_count + 3 # Convert num_epochs == 0 -> num_epochs is None, if necessary num_epochs = num_epochs or None # Build a queue of the filenames to be read. filename_queue = tf.train.string_input_producer(files, num_epochs, shuffle) example_id, encoded_example = tf.TextLineReader().read_up_to( filename_queue, batch_size) if shuffle: capacity = min_after_dequeue + queue_size_multiplier * batch_size return tf.train.shuffle_batch( [example_id, encoded_example], batch_size, capacity, min_after_dequeue, enqueue_many=True, num_threads=thread_count) else: capacity = queue_size_multiplier * batch_size return tf.train.batch( [example_id, encoded_example], batch_size, capacity=capacity, enqueue_many=True, num_threads=thread_count) # ============================================================================== # Building the TF learn estimators # ==============================================================================
def _build_input_fn(input_file_pattern, batch_size, mode): """Build input function. Args: input_file_pattern: The file patter for examples batch_size: Batch size mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. Returns: Tuple, dictionary of feature column name to tensor and labels. """ def _input_fn(): """Supplies the input to the model. Returns: A tuple consisting of 1) a dictionary of tensors whose keys are the feature names, and 2) a tensor of target labels if the mode is not INFER (and None, otherwise). """ logging.info("Reading files from %s", input_file_pattern) input_files = sorted(list(tf.gfile.Glob(input_file_pattern))) logging.info("Reading files from %s", input_files) include_target_column = (mode != tf.contrib.learn.ModeKeys.INFER) features_spec = tf.contrib.layers.create_feature_spec_for_parsing( feature_columns=_get_feature_columns(include_target_column)) if FLAGS.use_gzip: def gzip_reader(): return tf.TFRecordReader( options=tf.python_io.TFRecordOptions( compression_type=TFRecordCompressionType.GZIP)) reader_fn = gzip_reader else: reader_fn = tf.TFRecordReader features = tf.contrib.learn.io.read_batch_features( file_pattern=input_files, batch_size=batch_size, queue_capacity=3*batch_size, randomize_input=mode == tf.contrib.learn.ModeKeys.TRAIN, feature_queue_capacity=FLAGS.feature_queue_capacity, reader=reader_fn, features=features_spec) target = None if include_target_column: target = features.pop(FLAGS.target_field) return features, target return _input_fn
def _build_model_fn(): """Build model function. Returns: A model function that can be passed to `Estimator` constructor. """ def _model_fn(features, labels, mode): """Creates the prediction and its loss. Args: features: A dictionary of tensors keyed by the feature name. labels: A tensor representing the labels. mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. Returns: A tuple consisting of the prediction, loss, and train_op. """ # Generate one embedding per sparse feature column and concatenate them. concat_embeddings = tf.contrib.layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=_get_feature_columns(include_target_column=False)) # Add one hidden layer. hidden_layer_0 = tf.contrib.layers.relu( concat_embeddings, FLAGS.hidden_units) # Output and logistic loss. logits = tf.contrib.layers.linear(hidden_layer_0, FLAGS.num_classes) predictions = tf.contrib.layers.softmax(logits) if mode == tf.contrib.learn.ModeKeys.INFER: predictions = { tf.contrib.learn.PredictionKey.PROBABILITIES: predictions, PREDICTION_KEY: features[PREDICTION_KEY] } output_alternatives = { DEFAULT_OUTPUT_ALTERNATIVE: (tf.contrib.learn.ProblemType.UNSPECIFIED, predictions) } return model_fn.ModelFnOps( mode=mode, predictions=predictions, output_alternatives=output_alternatives) target_one_hot = tf.one_hot(labels, FLAGS.num_classes) target_one_hot = tf.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) loss = tf.losses.softmax_cross_entropy(target_one_hot, logits) if mode == tf.contrib.learn.ModeKeys.EVAL: return predictions, loss, None opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=FLAGS.learning_rate, optimizer=opt) return model_fn.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op) return _model_fn
def _def_experiment( train_file_pattern, eval_file_pattern, batch_size): """Creates the function used to configure the experiment runner. This function creates a function that is used by the learn_runner module to create an Experiment. Args: train_file_pattern: The directory the train data can be found in. eval_file_pattern: The directory the test data can be found in. batch_size: Batch size Returns: A function that creates an Experiment object for the runner. """ def _experiment_fn(output_dir): """Experiment function used by learn_runner to run training/eval/etc. Args: output_dir: String path of directory to use for outputs. Returns: tf.learn `Experiment`. """ estimator = tf.contrib.learn.Estimator( model_fn=_build_model_fn(), model_dir=output_dir) train_input_fn = _build_input_fn( input_file_pattern=train_file_pattern, batch_size=batch_size, mode=tf.contrib.learn.ModeKeys.TRAIN) eval_input_fn = _build_input_fn( input_file_pattern=eval_file_pattern, batch_size=batch_size, mode=tf.contrib.learn.ModeKeys.EVAL) return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, train_steps=FLAGS.num_train_steps, eval_input_fn=eval_input_fn, eval_steps=FLAGS.num_eval_steps, eval_metrics=_create_evaluation_metrics(), min_eval_frequency=100, export_strategies=[ saved_model_export_utils.make_export_strategy( _predict_input_fn, exports_to_keep=5, default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE) ]) return _experiment_fn