Python sklearn.preprocessing 模块,StandardScaler() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.preprocessing.StandardScaler()

项目:GlottGAN    作者:bajibabu    | 项目源码 | 文件源码
def load_data(data_dir, num_files=30):
    files_list = os.listdir(data_dir)
    data = None
    ac_data = None
    for fname in files_list[:num_files]:
        print fname
        f = os.path.join(data_dir, fname)
        with netcdf.netcdf_file(f, 'r') as fid:
            m = fid.variables['outputMeans'][:].copy()
            s = fid.variables['outputStdevs'][:].copy()
            feats = fid.variables['targetPatterns'][:].copy()
            ac_feats = fid.variables['inputs'][:].copy()
            scaler = preprocessing.StandardScaler()
            scaler.mean_ = m
            scaler.scale_ = s
            feats = scaler.inverse_transform(feats)
            assert feats.shape[0] == ac_feats.shape[0]
            # feats = np.concatenate((feats,ac_feats),axis=1)
        if data == None and ac_data == None:
            data = feats
            ac_data = ac_feats
        else:
            data = np.vstack((data, feats))
            ac_data = np.vstack((ac_data, ac_feats))
    return data, ac_data
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def computeNeighboursScores(self):
        all_instances = self.iteration.datasets.instances
        # Connectivity matrix
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('model', NearestNeighbors(self.num_neighbours, n_jobs = -1))])
        pipeline.fit(all_instances.getFeatures())
        # Labels
        labels = np.array([generateLabel(x) for x in all_instances.getLabels()])
        # Compute neighbour scores
        scores = []
        all_neighbours = pipeline.named_steps['model'].kneighbors(return_distance = False)
        for i, label in enumerate(labels):
            if label != 0:
                continue
            else:
                neighbours = all_neighbours[i]
                score = sum(labels[neighbours] + 1) / (2.0 * self.num_neighbours)
                scores.append(score)
        return np.array(scores)
项目:MENGEL    作者:CodeSpaceHQ    | 项目源码 | 文件源码
def scale_numeric_data(pandas_data):
    # Scaling is important because if the variables are too different from
    # one another, it can throw off the model.
    # EX: If one variable has an average of 1000, and another has an average
    # of .5, then the model won't be as accurate.
    for col in pandas_data.columns:
        if pandas_data[col].dtype == np.float64 or pandas_data[col].dtype == np.int64:
            pandas_data[col] = preprocessing.scale(pandas_data[col])

    return pandas_data


# Creates a standard scaler based on the training data and applies it to both train
# and test data.
# Input:
# - Two Pandas DataFrames, same number of columns
# Output:
# - Two Pandas DataFrames, both of which have been scaled based on StandardScaler
# trained on training data.
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
项目:kaggle_yt8m    作者:N01Z3    | 项目源码 | 文件源码
def tf2npz(tf_path, export_folder=FAST):
    vid_ids = []
    labels = []
    mean_rgb = []
    mean_audio = []
    tf_basename = os.path.basename(tf_path)
    npz_basename = tf_basename[:-len('.tfrecord')] + '.npz'
    isTrain = '/test' not in tf_path

    for example in tf.python_io.tf_record_iterator(tf_path):
        tf_example = tf.train.Example.FromString(example).features
        vid_ids.append(tf_example.feature['video_id'].bytes_list.value[0].decode(encoding='UTF-8'))
        if isTrain:
            labels.append(np.array(tf_example.feature['labels'].int64_list.value))
        mean_rgb.append(np.array(tf_example.feature['mean_rgb'].float_list.value).astype(np.float32))
        mean_audio.append(np.array(tf_example.feature['mean_audio'].float_list.value).astype(np.float32))

    save_path = export_folder + '/' + npz_basename
    np.savez(save_path,
             rgb=StandardScaler().fit_transform(np.array(mean_rgb)),
             audio=StandardScaler().fit_transform(np.array(mean_audio)),
             ids=np.array(vid_ids),
             labels=labels
             )
项目:OpenAPS    作者:medicinexlab    | 项目源码 | 文件源码
def preprocess_data(train_data_matrix, valid_data_matrix, test_data_matrix):
    """
    Function to preprocess the data with the standard scaler from sci-kit learn.
    It takes in the training, validation, and testing matrices and returns the
    standardized versions of them.

    Input:      train_data_matrix               The data matrix with the training set data
                valid_data_matrix               The data matrix with the validation set data
                test_data_matrix                The data matrix with the testing set data
.
    Output:     transform_train_data_matrix     The data matrix with the standardized training set data
                transform_valid_data_matrix     The data matrix with the standardized validation set data
                transform_test_data_matrix      The data matrix with the standardized testing set data
    Usage:      analyze_ml_data(actual_bg_test_array, test_prediction, True, False, True, False, "00000001", "Linear Regression", "Pred30Data5")
    """

    reg_scaler = prep.StandardScaler().fit(train_data_matrix)
    transform_train_data_matrix = reg_scaler.transform(train_data_matrix)
    transform_valid_data_matrix = reg_scaler.transform(valid_data_matrix)
    transform_test_data_matrix = reg_scaler.transform(test_data_matrix)

    return transform_train_data_matrix, transform_valid_data_matrix, transform_test_data_matrix
项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def load_norm_stats(stats_file, dim, method="MVN"):
    #### load norm stats ####
    io_funcs = BinaryIOCollection()

    norm_matrix, frame_number = io_funcs.load_binary_file_frame(stats_file, dim)
    assert frame_number==2

    if method=="MVN":
        scaler = preprocessing.StandardScaler()
        scaler.mean_  = norm_matrix[0, :]
        scaler.scale_ = norm_matrix[1, :]
    elif method=="MINMAX":
        scaler = preprocessing.MinMaxScaler(feature_range=(0.01, 0.99))
        scaler.min_   = norm_matrix[0, :]
        scaler.scale_ = norm_matrix[1, :]

    return scaler
项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def load_norm_stats(stats_file, dim, method="MVN"):
    #### load norm stats ####
    io_funcs = BinaryIOCollection()

    norm_matrix, frame_number = io_funcs.load_binary_file_frame(stats_file, dim)
    assert frame_number==2

    if method=="MVN":
        scaler = preprocessing.StandardScaler()
        scaler.mean_  = norm_matrix[0, :]
        scaler.scale_ = norm_matrix[1, :]
    elif method=="MINMAX":
        scaler = preprocessing.MinMaxScaler(feature_range=(0.01, 0.99))
        scaler.min_   = norm_matrix[0, :]
        scaler.scale_ = norm_matrix[1, :]

    return scaler
项目:pyglmnet    作者:glm-tools    | 项目源码 | 文件源码
def test_group_lasso():
    """Group Lasso test."""
    n_samples, n_features = 100, 90

    # assign group ids
    groups = np.zeros(90)
    groups[0:29] = 1
    groups[30:59] = 2
    groups[60:] = 3

    # sample random coefficients
    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)
    beta[groups == 2] = 0.

    # create an instance of the GLM class
    glm_group = GLM(distr='softplus', alpha=1.)

    # simulate training data
    Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
    yr = simulate_glm(glm_group.distr, beta0, beta, Xr)

    # scale and fit
    scaler = StandardScaler().fit(Xr)
    glm_group.fit(scaler.transform(Xr), yr)
项目:GlottGAN    作者:bajibabu    | 项目源码 | 文件源码
def load_data(data_dir, num_files=30):
    files_list = os.listdir(data_dir)
    data = None
    ac_data = None
    for fname in files_list[:num_files]:
        print fname
        f = os.path.join(data_dir, fname)
        with netcdf.netcdf_file(f, 'r') as fid:
            m = fid.variables['outputMeans'][:].copy()
            s = fid.variables['outputStdevs'][:].copy()
            feats = fid.variables['targetPatterns'][:].copy()
            ac_feats = fid.variables['inputs'][:].copy()
            scaler = preprocessing.StandardScaler()
            scaler.mean_ = m
            scaler.scale_ = s
            feats = scaler.inverse_transform(feats)
            assert feats.shape[0] == ac_feats.shape[0]
            # feats = np.concatenate((feats,ac_feats),axis=1)
        if data == None and ac_data == None:
            data = feats
            ac_data = ac_feats
        else:
            data = np.vstack((data, feats))
            ac_data = np.vstack((ac_data, ac_feats))
    return data, ac_data
项目:GlottGAN    作者:bajibabu    | 项目源码 | 文件源码
def load_data(data_dir, num_files=30):
    files_list = os.listdir(data_dir)
    data = None
    ac_data = None
    for fname in files_list[:num_files]:
        print fname
        f = os.path.join(data_dir, fname)
        with netcdf.netcdf_file(f, 'r') as fid:
            m = fid.variables['outputMeans'][:].copy()
            s = fid.variables['outputStdevs'][:].copy()
            feats = fid.variables['targetPatterns'][:].copy()
            ac_feats = fid.variables['inputs'][:].copy()
            scaler = preprocessing.StandardScaler()
            scaler.mean_ = m
            scaler.scale_ = s
            feats = scaler.inverse_transform(feats)
            assert feats.shape[0] == ac_feats.shape[0]
            # feats = np.concatenate((feats,ac_feats),axis=1)
        if data == None and ac_data == None:
            data = feats
            ac_data = ac_feats
        else:
            data = np.vstack((data, feats))
            ac_data = np.vstack((ac_data, ac_feats))
    return data, ac_data
项目:GlottGAN    作者:bajibabu    | 项目源码 | 文件源码
def load_data(data_dir, num_files=30):
    files_list = os.listdir(data_dir)
    data = None
    for fname in files_list[:num_files]:
        print fname
        f = os.path.join(data_dir, fname)
        with netcdf.netcdf_file(f, 'r') as fid:
            m = fid.variables['outputMeans'][:].copy()
            s = fid.variables['outputStdevs'][:].copy()
            feats = fid.variables['targetPatterns'][:].copy()
            scaler = preprocessing.StandardScaler()
            scaler.mean_ = m
            scaler.scale_ = s
            feats = scaler.inverse_transform(feats)
        if data == None:
            data = feats
        else:
            data = np.vstack((data, feats))
    return data
项目:GlottGAN    作者:bajibabu    | 项目源码 | 文件源码
def load_data(data_dir, num_files=30):
    files_list = os.listdir(data_dir)
    dataset = []
    ac_dataset = []
    for fname in files_list[:num_files]:
        #print(fname)
        f = os.path.join(data_dir, fname)
        with netcdf.netcdf_file(f, 'r') as fid:
            m = fid.variables['outputMeans'][:].copy()
            s = fid.variables['outputStdevs'][:].copy()
            feats = fid.variables['targetPatterns'][:].copy()
            ac_feats = fid.variables['inputs'][:].copy()
            scaler = preprocessing.StandardScaler()
            scaler.mean_ = m
            scaler.scale_ = s
            feats = scaler.inverse_transform(feats)
            assert feats.shape[0] == ac_feats.shape[0]
            dataset.extend(feats)
            ac_dataset.extend(ac_feats)
    dataset = np.asarray(dataset)
    ac_dataset = np.asarray(ac_dataset)
    #print(dataset.shape, ac_dataset.shape)
    return dataset, ac_dataset
项目:sl-quant    作者:danielzak    | 项目源码 | 文件源码
def init_state(indata, test=False):
    close = indata['close'].values
    diff = np.diff(close)
    diff = np.insert(diff, 0, 0)
    sma15 = SMA(indata, timeperiod=15)
    sma60 = SMA(indata, timeperiod=60)
    rsi = RSI(indata, timeperiod=14)
    atr = ATR(indata, timeperiod=14)

    #--- Preprocess data
    xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr))

    xdata = np.nan_to_num(xdata)
    if test == False:
        scaler = preprocessing.StandardScaler()
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
        joblib.dump(scaler, 'data/scaler.pkl')
    elif test == True:
        scaler = joblib.load('data/scaler.pkl')
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
    state = xdata[0:1, 0:1, :]

    return state, xdata, close

#Take Action
项目:sl-quant    作者:danielzak    | 项目源码 | 文件源码
def init_state(data):

    close = data
    diff = np.diff(data)
    diff = np.insert(diff, 0, 0)

    #--- Preprocess data
    xdata = np.column_stack((close, diff))
    xdata = np.nan_to_num(xdata)
    scaler = preprocessing.StandardScaler()
    xdata = scaler.fit_transform(xdata)

    state = xdata[0:1, :]
    return state, xdata

#Take Action
项目:sl-quant    作者:danielzak    | 项目源码 | 文件源码
def init_state(data):

    close = data
    diff = np.diff(data)
    diff = np.insert(diff, 0, 0)

    #--- Preprocess data
    xdata = np.column_stack((close, diff))
    xdata = np.nan_to_num(xdata)
    scaler = preprocessing.StandardScaler()
    xdata = scaler.fit_transform(xdata)

    state = xdata[0:1, :]
    return state, xdata

#Take Action
项目:time_series_modeling    作者:rheineke    | 项目源码 | 文件源码
def sample_pipelines(pca_kernels=None, svr_kernels=None):
    """
    Pipelines that can't be fit in a reasonable amount of time on the whole
    dataset
    """
    # Model instances
    model_steps = []
    if pca_kernels is None:
        pca_kernels = ['poly', 'rbf', 'sigmoid', 'cosine']
    for pca_kernel in pca_kernels:
        model_steps.append([
            KernelPCA(n_components=2, kernel=pca_kernel),
            LinearRegression(),
        ])
    if svr_kernels is None:
        svr_kernels = ['poly', 'rbf', 'sigmoid']
    for svr_kernel in svr_kernels:
        model_steps.append(SVR(kernel=svr_kernel, verbose=True, cache_size=1000))

    # Pipelines
    pipelines = []
    for m in model_steps:
        # Steps
        common_steps = [
            StandardScaler(),
        ]
        model_steps = m if isinstance(m, list) else [m]
        steps = common_steps + model_steps
        pipelines.append(make_pipeline(*steps))
    return pipelines
项目:PySCUBA    作者:GGiecold    | 项目源码 | 文件源码
def PCA_analysis(data, mode, cell_stages = None):
    """Principal Component Analysis.
    """

    assert mode in {'pca', 'pca2'}

    mean_shifter = StandardScaler(with_std = False)

    if mode == 'pca':
        pca = PCA(min(data.shape))
        projected_data = pca.fit_transform(data)
        projected_data = pca.fit_transform(mean_shifter.fit_transform(data))
        components = pca.components_
    else:
        assert isinstance(cell_stages, np.ndarray)

        idx = np.where(cell_stages == np.max(cell_stages))[0]

        pca = PCA(min(idx.size, data.shape[1]))
        pca.fit(mean_shifter.fit_transform(data[idx]))
        components = pca.components_
        projected_data = np.dot(data, components.T)

    return components, projected_data
项目:jingjuSingingPhraseMatching    作者:ronggong    | 项目源码 | 文件源码
def mfccFeature_audio(filename_wav,index_keep,feature_type='mfcc'):
    audio               = ess.MonoLoader(downmix = 'left', filename = filename_wav, sampleRate = fs)()
    if feature_type == 'mfcc':
        feature             = getFeature(audio)
    elif feature_type == 'mfccBands1D':
        feature             = getMFCCBands1D(audio)
    elif feature_type == 'mfccBands2D':
        feature             = getMFCCBands2D(audio,nbf=True)

    if feature_type == 'mfccBands1D' or feature_type == 'mfccBands2D':
        feature             = np.log(100000 * feature + 1)
        scaler = pickle.load(open(kerasScaler_path,'rb'))
        feature = scaler.transform(feature)

    # feature             = preprocessing.StandardScaler().fit_transform(feature)
    # index_keep          = pitchProcessing_audio(filename_wav)
    feature_out         = feature[index_keep[0],:]

    for index in index_keep[1:]:
        feature_out = np.vstack((feature_out,feature[index,:]))

    if feature_type == 'mfccBands2D':
        feature_out = featureReshape(feature_out)

    return feature_out
项目:jingjuSingingPhraseMatching    作者:ronggong    | 项目源码 | 文件源码
def trainValidationSplit(dic_pho_feature_train,validation_size=0.2):
    '''
    split the feature in dic_pho_feature_train into train and validation set
    :param dic_pho_feature_train: input dictionary, key: phoneme, value: feature vectors
    :return:
    '''
    feature_all = []
    label_all = []
    for key in dic_pho_feature_train:
        feature = dic_pho_feature_train[key]
        label = [dic_pho_label[key]] * len(feature)

        if len(feature):
            if not len(feature_all):
                feature_all = feature
            else:
                feature_all = np.vstack((feature_all, feature))
            label_all += label
    label_all = np.array(label_all,dtype='int64')

    feature_all = preprocessing.StandardScaler().fit_transform(feature_all)
    feature_train, feature_validation, label_train, label_validation = \
        train_test_split(feature_all, label_all, test_size=validation_size, stratify=label_all)

    return feature_train, feature_validation, label_train, label_validation
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_boston(self):
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = StandardScaler().fit(scikit_data.data)

        spec = converter.convert(scikit_model, scikit_data.feature_names, 'out').get_spec()

        input_data = [dict(zip(scikit_data.feature_names, row)) 
                for row in scikit_data.data]

        output_data = [{"out" : row} for row in scikit_model.transform(scikit_data.data)]

        metrics = evaluate_transformer(spec, input_data, output_data)

        assert metrics["num_errors"] == 0
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_boston_OHE_plus_normalizer(self): 

        data = load_boston()

        pl = Pipeline([
            ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)), 
            ("Scaler",StandardScaler())])

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, 'out')

        input_data = [dict(zip(data.feature_names, row)) for row in data.data]
        output_data = [{"out" : row} for row in pl.transform(data.data)]

        result = evaluate_transformer(spec, input_data, output_data)

        assert result["num_errors"] == 0
项目:Gaussian_process    作者:happyjin    | 项目源码 | 文件源码
def dataset_generator():
    """
    generate dataset for binary classification
    :return:
    """
    X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                               random_state=1, n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)

    datasets = [make_moons(noise=0.3, random_state=0),
                make_circles(noise=0.2, factor=0.5, random_state=1),
                linearly_separable
                ]

    X, y = datasets[0]
    y[y == 0] = -1
    X = StandardScaler().fit_transform(X)
    return X, y
项目:PyMLT    作者:didw    | 项目源码 | 文件源码
def make_x_y(self, data, code):
        data_x = []
        data_y = []
        data.loc[:, 'month'] = data.loc[:, '??']%10000/100
        data = data.drop(['??', '????'], axis=1)

        # normalization
        data = np.array(data)
        if len(data) <= 0 :
            return np.array([]), np.array([])

        if code not in self.scaler:
            self.scaler[code] = StandardScaler()
            data = self.scaler[code].fit_transform(data)
        elif code not in self.scaler:
            return np.array([]), np.array([])
        else:
            data = self.scaler[code].transform(data)

        for i in range(self.frame_len, len(data)-self.predict_dist+1):
            data_x.extend(np.array(data[i-self.frame_len:i, :]))
            data_y.append(data[i+self.predict_dist-1][0])
        np_x = np.array(data_x).reshape(-1, 23*30)
        np_y = np.array(data_y)
        return np_x, np_y
项目:PyMLT    作者:didw    | 项目源码 | 文件源码
def make_x_y(self, data, code):
        data_x = []
        data_y = []
        data.loc[:, 'month'] = data.loc[:, '??']%10000/100
        data = data.drop(['??', '????'], axis=1)

        # normalization
        data = np.array(data)
        if len(data) <= 0 :
            return np.array([]), np.array([])

        if code not in self.scaler:
            self.scaler[code] = StandardScaler()
            data = self.scaler[code].fit_transform(data)
        elif code not in self.scaler:
            return np.array([]), np.array([])
        else:
            data = self.scaler[code].transform(data)

        for i in range(self.frame_len, len(data)-self.predict_dist+1):
            data_x.extend(np.array(data[i-self.frame_len:i, :]))
            data_y.append(data[i+self.predict_dist-1][0])
        np_x = np.array(data_x).reshape(-1, 23*self.frame_len)
        np_y = np.array(data_y)
        return np_x, np_y
项目:lab5    作者:zlotus    | 项目源码 | 文件源码
def fit_model(self, logging_uuid, model=None, epochs=1000, batch_size=10):
        if model is not None:
            self.model = model
        X, y, _ = self.get_formulation_training_data()
        scaler = StandardScaler().fit(X)
        lcb = LambdaCallback(
            on_epoch_end=
            lambda epoch, logs:
            r.set(logging_uuid, json.dumps({'model_state': 'training',
                                            'epoch': epoch,
                                            'epochs': epochs,
                                            'loss': logs['loss']})),
            on_train_end=
            lambda logs:
            r.set(logging_uuid, json.dumps({'model_state': 'training',
                                            'epoch': epochs,
                                            'epochs': epochs})),
        )
        self.fit_history = self.model.fit(scaler.transform(X), y,
                                          epochs=epochs,
                                          batch_size=batch_size,
                                          verbose=0,
                                          callbacks=[lcb])
        return self.model, self.fit_history
项目:lab5    作者:zlotus    | 项目源码 | 文件源码
def save_grid_to_db(self, model=None):
        if model is not None:
            self.model = model
        f_instance = Formulation.query.get(self.f_id)
        f_instance.formulation_data_grid.delete()
        # prepare data lines to plot
        X, y, data_traces = self.get_formulation_training_data()
        # train model to fit data lines
        scaler = StandardScaler().fit(X)
        # prepare mesh grid to plot
        max_t, max_f = np.amax(X, axis=0)
        min_t, min_f = np.amin(X, axis=0)
        xv, yv = np.meshgrid(np.arange(floor(min_t), ceil(max_t)),
                             np.arange(floor(min_f), ceil(max_f)),
                             indexing='ij')
        xv = xv.reshape((xv.shape[0], xv.shape[1], -1))
        yv = yv.reshape((yv.shape[0], yv.shape[1], -1))
        grid_xys = np.concatenate((xv, yv), axis=2).reshape((-1, 2))
        # predict z for grid
        grid_zs = self.model.predict(scaler.transform(grid_xys)).reshape((-1))
        for x, y, z in zip(grid_xys[:, 0], grid_xys[:, 1], grid_zs):
            f_instance.formulation_data_grid.append(FormulationDataGrid(x_value=x, y_value=y, z_value=z))
        db.session.commit()
项目:color-features    作者:skearnes    | 项目源码 | 文件源码
def scale_features(features, train):
    """Scale features, using test set to learn parameters.

    Returns:
        Scaled copy of features.
    """
    if FLAGS.scaling is None:
        return features
    logging.info('Scaling features with %s', FLAGS.scaling)
    if FLAGS.scaling == 'max_abs':
        scaler = preprocessing.MaxAbsScaler()
    elif FLAGS.scaling == 'standard':
        scaler = preprocessing.StandardScaler()
    else:
        raise ValueError('Unrecognized scaling %s' % FLAGS.scaling)
    scaler.fit(features[train])
    return scaler.transform(features)
项目:fri    作者:lpfann    | 项目源码 | 文件源码
def test_multiprocessing():
    generator = check_random_state(0)
    data = genData(n_samples=200, n_features=4, n_redundant=2,strRel=2,
                    n_repeated=0, class_sep=1, flip_y=0, random_state=generator)

    X_orig, y = data
    X_orig = StandardScaler().fit(X_orig).transform(X_orig)

    X = np.c_[X_orig, generator.normal(size=(len(X_orig), 6))]
    y = list(y)   # regression test: list should be supported

    # Test using the score function
    fri = EnsembleFRI(FRIClassification(random_state=generator),n_bootstraps=5,n_jobs=2, random_state=generator)
    fri.fit(X, y)
    # non-regression test for missing worst feature:
    assert len(fri.allrel_prediction_) == X.shape[1]
    assert len(fri.interval_) == X.shape[1]

    # All strongly relevant features have a lower bound > 0
    assert np.all(fri.interval_[0:2,0]>0)
    # All weakly relevant features should have a lower bound 0
    assert np.any(fri.interval_[2:4,0]>0) == False
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def __init__(self, X=None, y=None, ax=None, scale=True, color=None, proj_dim=2,
                 colormap=palettes.DEFAULT_SEQUENCE, **kwargs):
        super(PCADecomposition, self).__init__(ax=ax, **kwargs)
        # Data Parameters
        if proj_dim not in (2, 3):
            raise YellowbrickValueError("proj_dim object is not 2 or 3.")

        self.color = color
        self.pca_features_ = None
        self.scale = scale
        self.proj_dim = proj_dim
        self.pca_transformer = Pipeline([('scale', StandardScaler(with_std=self.scale)),
                                         ('pca', PCA(self.proj_dim, ))
                                         ])
        # Visual Parameters
        self.colormap = colormap
项目:python-machine-learning-book    作者:jeremyn    | 项目源码 | 文件源码
def get_standardized_wine_data():
    df = pd.read_csv(os.path.join('datasets', 'wine.data'), header=None)
    df.columns = [
        'Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash',
        'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',
        'Proanthocyanins', 'Color intensity', 'Hue',
        'OD280/OD315 of diluted wines', 'Proline',
    ]
    X = df.iloc[:, 1:].values
    y = df.iloc[:, 0].values
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=0.3,
        random_state=0,
    )
    sc = StandardScaler()
    X_train_std = sc.fit_transform(X_train)
    X_test_std = sc.transform(X_test)

    return X_train_std, X_test_std, y_train, y_test
项目:leap-scd    作者:smittal6    | 项目源码 | 文件源码
def load_data_train(trainfile):
        print "Getting the training data"
        a=htk.open(trainfile)
        train_data=a.getall()
        print "Done with Loading the training data: ",train_data.shape
        data=filter_data_train(train_data)
        # x_train=cnn_reshaper(data[:,:-2]) #Set to different column based on different model
        x_train=data[:,:-2] #Set to different column based on different model
        scaler=StandardScaler().fit(x_train)
        # x_train=scaler.transform(x_train)
        Y_train=data[:,-2]
        print Y_train.shape
        # print np.where(Y_train==2)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        y_train=np_utils.to_categorical(Y_train,2)
        print y_train[0:5,:]
        gender_train=data[:,-1]
        del data
        return x_train,y_train,gender_train,scaler
项目:leap-scd    作者:smittal6    | 项目源码 | 文件源码
def load_data_train(trainfile):
        print "Getting the training data"
        a=htk.open(trainfile)
        train_data=a.getall()
        print "Done with Loading the training data: ",train_data.shape
        data=filter_data_train(train_data)
        # x_train=cnn_reshaper(data[:,:-2]) #Set to different column based on different model
        x_train=data[:,:-2] #Set to different column based on different model
        scaler=StandardScaler().fit(x_train)
        # x_train=scaler.transform(x_train)
        Y_train=data[:,-2]
        print Y_train.shape
        # print np.where(Y_train==2)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        y_train=np_utils.to_categorical(Y_train,2)
        print y_train[0:5,:]
        gender_train=data[:,-1]
        del data
        #x_train has complete data, that is gammatone and also the pitch variance values.
        return x_train,y_train,gender_train,scaler
项目:leap-scd    作者:smittal6    | 项目源码 | 文件源码
def load_data_train(trainfile):
        print "Getting the training data"
        a=htk.open(trainfile)
        train_data=a.getall()
        print "Done with Loading the training data: ",train_data.shape
        data=filter_data_train(train_data)
        # x_train=cnn_reshaper(data[:,:-2]) #Set to different column based on different model
        x_train=data[:,:-2] #Set to different column based on different model
        scaler=StandardScaler().fit(x_train)
        # x_train=scaler.transform(x_train)
        Y_train=data[:,-2]
        print Y_train.shape
        # print np.where(Y_train==2)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        y_train=np_utils.to_categorical(Y_train,2)
        print y_train[0:5,:]
        gender_train=data[:,-1]
        del data
        #x_train has complete data, that is gammatone and also the pitch variance values.
        return x_train,y_train,gender_train,scaler
项目:leap-scd    作者:smittal6    | 项目源码 | 文件源码
def load_data_train(trainfile):
        print "Getting the training data"
        a=htk.open(trainfile)
        train_data=a.getall()
        print "Done with Loading the training data: ",train_data.shape
        data=filter_data_train(train_data)
        # x_train=cnn_reshaper(data[:,:-2]) #Set to different column based on different model
        x_train=data[:,:-2] #Set to different column based on different model
        scaler=StandardScaler().fit(x_train)
        # x_train=scaler.transform(x_train)
        Y_train=data[:,-2]
        print Y_train.shape
        # print np.where(Y_train==2)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        y_train=np_utils.to_categorical(Y_train,2)
        print y_train[0:5,:]
        gender_train=data[:,-1]
        del data
        return x_train,y_train,gender_train,scaler
项目:leap-scd    作者:smittal6    | 项目源码 | 文件源码
def load_data_train(trainfile):
        print "Getting the training data"
        a=htk.open(trainfile)
        train_data=a.getall()
        print "Done with Loading the training data: ",train_data.shape
        data=filter_data_train(train_data)
        x_train=data[:,:-2]
        scaler=StandardScaler().fit(x_train)
        # x_train=scaler.transform(x_train)
        x_train=cnn_reshaper(data[:,:-2]) #Set to different column based on different model
        Y_train=data[:,-2]
        print Y_train.shape
        # print np.where(Y_train==2)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        y_train=np_utils.to_categorical(Y_train,2)
        gender_train=data[:,-1]
        del data
        return x_train,y_train,gender_train,scaler
项目:muffnn    作者:civisanalytics    | 项目源码 | 文件源码
def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96)
项目:algo-trading-pipeline    作者:NeuralKnot    | 项目源码 | 文件源码
def create_model(self, training_articles):
        model = OneVsRestClassifier(svm.SVC(probability=True))

        features = []
        labels = []
        i = 0
        for article in training_articles:
            print("Generating features for article " + str(i) + "...")
            google_cloud_response = self.analyze_text_google_cloud(article["article"])
            relevant_entities = self.get_relevant_entities(google_cloud_response["entities"], article["market"]["entities"], article["market"]["wikipedia_urls"])

            # Only count this article if a relevant entity is present
            if relevant_entities:
                article_features = self.article_features(relevant_entities, article["market"], google_cloud_response, article["article"])
                features.append(article_features)
                labels.append(article["label"])
            else:
                print("Skipping article " + str(i) + "...")

            i = i + 1

        print("Performing feature scaling...")
        scaler = preprocessing.StandardScaler().fit(features)
        features_scaled = scaler.transform(features)

        print("Fitting model...")
        model.fit(features_scaled, labels)

        print("Saving model...")
        joblib.dump(scaler, "data_analysis/caler.pkl")
        joblib.dump(model, "data_analysis/model.pkl")

        print("Done!")

    # For use in prod
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __load_chn_data(self,selectChan,file_name):
        spk_startswith = "spike_{0}".format(selectChan)
        with hp.File(file_name,"r") as f:
            times = list()
            waveforms = list()
            units = list()
            for chn_unit in f["spikes"].keys():
                if chn_unit.startswith(spk_startswith):
                    tep_time = f["spikes"][chn_unit]["times"].value
                    waveform = f["spikes"][chn_unit]["waveforms"].value
                    unit = int(chn_unit.split("_")[-1])
                    unit = np.ones(tep_time.shape,dtype=np.int32)*unit
                    times.append(tep_time)
                    waveforms.append(waveform)
                    units.append(unit)
            if times:
                times = np.hstack(times)
                units = np.hstack(units)
                waveforms = np.vstack(waveforms)
                sort_index = np.argsort(times)
                units = units[sort_index]
                waveforms = waveforms[sort_index]
                times = times[sort_index]
                # calculate waveform_range 
                waveforms_max = np.apply_along_axis(max,1,waveforms)
                waveforms_min = np.apply_along_axis(min,1,waveforms)
                waveforms_range = np.vstack([waveforms_min,waveforms_max]).T
                # calculate PCA of waveforms
                scaler = StandardScaler()
                scaler.fit(waveforms)
                waveforms_scaled = scaler.transform(waveforms)
                pca = PCA(n_components=self.pca_used_num)
                pca.fit(waveforms_scaled)
                wavePCAs = pca.transform(waveforms_scaled)
                return times,units,waveforms_range,wavePCAs
            else:
                return None,None,None,None
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __load_chn_data(self,selectChan,file_name):
        spk_startswith = "spike_{0}".format(selectChan)
        with hp.File(file_name,"r") as f:
            times = list()
            waveforms = list()
            units = list()
            for chn_unit in f["spikes"].keys():
                if chn_unit.startswith(spk_startswith):
                    tep_time = f["spikes"][chn_unit]["times"].value
                    waveform = f["spikes"][chn_unit]["waveforms"].value
                    unit = int(chn_unit.split("_")[-1])
                    unit = np.ones(tep_time.shape,dtype=np.int32)*unit
                    times.append(tep_time)
                    waveforms.append(waveform)
                    units.append(unit)
            if times:
                times = np.hstack(times)
                units = np.hstack(units)
                waveforms = np.vstack(waveforms)
                sort_index = np.argsort(times)
                units = units[sort_index]
                waveforms = waveforms[sort_index]
                times = times[sort_index]
                # calculate waveform_range 
                waveforms_max = np.apply_along_axis(max,1,waveforms)
                waveforms_min = np.apply_along_axis(min,1,waveforms)
                waveforms_range = np.vstack([waveforms_min,waveforms_max]).T
                # calculate PCA of waveforms
                scaler = StandardScaler()
                scaler.fit(waveforms)
                waveforms_scaled = scaler.transform(waveforms)
                pca = PCA(n_components=self.pca_used_num)
                pca.fit(waveforms_scaled)
                wavePCAs = pca.transform(waveforms_scaled)
                return times,units,waveforms_range,wavePCAs
            else:
                return None,None,None,None
项目:Kaggle    作者:lawlite19    | 项目源码 | 文件源码
def pre_processData(train_data,file_path):
    train_data.loc[(train_data.Age.isnull()), 'Age' ] = np.mean(train_data.Age)  # ???????????
    train_data.loc[(train_data.Cabin.notnull(),'Cabin')] = 'yes' # Cabin??????yes
    train_data.loc[(train_data.Cabin.isnull(),'Cabin')] = 'no'    
    '''0/1????'''
    dummies_cabin = pd.get_dummies(train_data['Cabin'],prefix='Cabin')  # get_dummies?????0/1??????????????prefix???Cabin
    dummies_Embarked = pd.get_dummies(train_data['Embarked'], prefix='Embarked')
    dummies_Sex = pd.get_dummies(train_data['Sex'], prefix='Sex')
    dummies_Pclass = pd.get_dummies(train_data['Pclass'],prefix='Pclass')
    train_data = pd.concat([train_data,dummies_cabin,dummies_Embarked,dummies_Pclass,dummies_Sex], axis=1)  # ??dataframe,axis=1??
    train_data.drop(['Pclass','Name','Sex','Embarked','Cabin','Ticket'],axis=1,inplace=True)   # ????????????            
    header_string = ','.join(train_data.columns.tolist())  # ?????string???????
    np.savetxt(file_path+r'/pre_processData1.csv', train_data, delimiter=',',header=header_string)  # ?????????????    
    '''???????(Age?Fare)'''
    scaler = StandardScaler()
    age_scaler = scaler.fit(train_data['Age'])
    train_data['Age'] = age_scaler.fit_transform(train_data['Age'])
    if np.sum(train_data.Fare.isnull()):  # ??Fare???????????
        train_data.loc[(train_data.Fare.isnull(),'Fare')]=np.mean(train_data.Fare)
    fare_scaler = scaler.fit(train_data['Fare'])
    train_data['Fare'] = fare_scaler.transform(train_data['Fare'])
    header_string = ','.join(train_data.columns.tolist())  # ?????string???????
    np.savetxt(file_path+r'/pre_processData_scaled.csv', train_data, delimiter=',',header=header_string)  # ?????????????    
    return train_data






## feature engineering?????-?????
项目:PortfolioTimeSeriesAnalysis    作者:MizioAnd    | 项目源码 | 文件源码
def feature_scaling(self, df):
        df = df.copy()
        # Standardization (centering and scaling) of dataset that removes mean and scales to unit variance
        standard_scaler = StandardScaler()
        numerical_feature_names_of_non_modified_df = TwoSigmaFinModTools._numerical_feature_names
        if any(tuple(df.columns == 'y')):
            if not TwoSigmaFinModTools._is_one_hot_encoder:
                numerical_feature_names_of_non_modified_df = np.concatenate(
                    [TwoSigmaFinModTools._feature_names_num.values, numerical_feature_names_of_non_modified_df.values])
            # Include scaling of y
            y = df['y'].values
            relevant_features = df[numerical_feature_names_of_non_modified_df].columns[
                (df[numerical_feature_names_of_non_modified_df].columns != 'y')
                & (df[numerical_feature_names_of_non_modified_df].columns != 'id')]
            mask = ~df[relevant_features].isnull()
            res = standard_scaler.fit_transform(X=df[relevant_features][mask].values, y=y)
            if (~mask).sum().sum() > 0:
                df = self.standardize_relevant_features(df, relevant_features, res)
            else:
                df.loc[:, tuple(relevant_features)] = res
        else:
            if not TwoSigmaFinModTools._is_one_hot_encoder:
                numerical_feature_names_of_non_modified_df = np.concatenate(
                    [TwoSigmaFinModTools._feature_names_num.values, numerical_feature_names_of_non_modified_df.values])
            relevant_features = df[numerical_feature_names_of_non_modified_df].columns[
                (df[numerical_feature_names_of_non_modified_df].columns != 'id')]
            mask = ~df[relevant_features].isnull()
            res = standard_scaler.fit_transform(df[relevant_features][mask].values)
            if mask.sum().sum() > 0:
                df = self.standardize_relevant_features(df, relevant_features, res)
            else:
                df.loc[:, tuple(relevant_features)] = res
        return df
项目:pokedex-as-it-should-be    作者:leotok    | 项目源码 | 文件源码
def make_standard(X_train, X_test):
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    pickle.dump(scaler, open("scaler_model.sav", 'wb'))
    return X_train, X_test
项目:iFruitFly    作者:AdnanMuhib    | 项目源码 | 文件源码
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
    _val = []
    _coords = []
    file_dir_fix = dir + "\\output_INFLO.csv"
    #f = "C:\Users\Abdullah Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
    with open(file_dir_fix, 'rU') as inp:
        rd = csv.reader(inp)
        for row in rd:
            _val.append([row[1], row[2], row[0]])

    #print(_center)
    _val = np.asarray(_val)
    _val_original = _val
    _val_original = map(myFloat, _val_original)
    _val_original = map(myInt, _val_original)
    #_val_original = map(myTemp, _val_original)
    _val_original = np.asarray(_val_original)
    _val = preprocessing.StandardScaler().fit_transform(_val)
    #_center = preprocessing.MinMaxScaler()
    #_center.fit_transform(_val)
    #_arr = StandardScaler().inverse_transform(_center)
    #print(_arr)
    #print(_center)
    new_file = prefix + file_name + ".png"
    dbFun(_val, _val_original, new_file)
    #_len = len(_center)
    return
项目:iFruitFly    作者:AdnanMuhib    | 项目源码 | 文件源码
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
    _val = []
    _coords = []
    file_dir_fix = dir + "\\output_INFLO.csv"
    #f = "C:\Users\Abdullah Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
    with open(file_dir_fix, 'rU') as inp:
        rd = csv.reader(inp)
        for row in rd:
            _val.append([row[1], row[2], row[0]])

    #print(_center)
    _val = np.asarray(_val)
    _val_original = _val
    _val_original = map(myFloat, _val_original)
    _val_original = map(myInt, _val_original)
    #_val_original = map(myTemp, _val_original)
    _val_original = np.asarray(_val_original)
    _val = preprocessing.StandardScaler().fit_transform(_val)
    #_center = preprocessing.MinMaxScaler()
    #_center.fit_transform(_val)
    #_arr = StandardScaler().inverse_transform(_center)
    #print(_arr)
    #print(_center)
    new_file = prefix + file_name + ".png"
    dbFun(_val, _val_original, new_file)
    #_len = len(_center)
    return
项目:iFruitFly    作者:AdnanMuhib    | 项目源码 | 文件源码
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
    _val = []
    _coords = []
    file_dir_fix = dir + "\\output_INFLO.csv"
    #f = "C:\Users\Abdullah
    #Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
    with open(file_dir_fix, 'rU') as inp:
        rd = csv.reader(inp)
        for row in rd:
            _val.append([row[1], row[2], row[0]])

    #print(_center)
    _val = np.asarray(_val)
    _val_original = _val
    _val_original = map(myFloat, _val_original)
    _val_original = map(myInt, _val_original)
    #_val_original = map(myTemp, _val_original)
    _val_original = np.asarray(_val_original)
    _val = preprocessing.StandardScaler().fit_transform(_val)
    #_center = preprocessing.MinMaxScaler()
    #_center.fit_transform(_val)
    #_arr = StandardScaler().inverse_transform(_center)
    #print(_arr)
    #print(_center)
    new_file = prefix + file_name + ".png"
    dbFun(_val, _val_original, new_file)
    #_len = len(_center)
    return

##############################################################################################
# Getting the clusters and printing in the most trivial way as asked by Dr Sheikh Faisal
项目:sef    作者:passalis    | 项目源码 | 文件源码
def supervised_reduction(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)

    train_data, train_labels, test_data, test_labels = dataset_loader(dataset, seed=1)


    scaler = StandardScaler()
    train_data = scaler.fit_transform(train_data)
    test_data = scaler.transform(test_data)

    if dataset == 'yale':
        regularizer_weight = 0.0001
    else:
        regularizer_weight = 1

    n_classes = len(np.unique(train_labels))

    if method == 'lda':
        proj = LinearDiscriminantAnalysis(n_components=n_classes - 1)
        proj.fit(train_data, train_labels)
    elif method == 's-lda':
        proj = LinearSEF(train_data.shape[1], output_dimensionality=(n_classes - 1))
        proj.cuda()
        loss = proj.fit(data=train_data, target_labels=train_labels, epochs=100,
                        target='supervised', batch_size=256, regularizer_weight=regularizer_weight, learning_rate=0.001,
                        verbose=False)

    elif method == 's-lda-2x':
        # SEF output dimensions are not limited
        proj = LinearSEF(train_data.shape[1], output_dimensionality=2 * (n_classes - 1))
        proj.cuda()
        loss = proj.fit(data=train_data, target_labels=train_labels, epochs=100,
                        target='supervised', batch_size=256, regularizer_weight=regularizer_weight, learning_rate=0.001,
                        verbose=False)

    acc = evaluate_svm(proj.transform(train_data), train_labels,
                       proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
项目:sef    作者:passalis    | 项目源码 | 文件源码
def outofsample_extensions(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)

    train_data, train_labels, test_data, test_labels = dataset_loader(dataset, seed=1)

    # Learn a new space using Isomap
    isomap = Isomap(n_components=10, n_neighbors=20)
    train_data_isomap = np.float32(isomap.fit_transform(train_data))

    if method == 'linear-regression':
        from sklearn.preprocessing import StandardScaler
        std = StandardScaler()
        train_data = std.fit_transform(train_data)
        test_data = std.transform(test_data)

        # Use linear regression to provide baseline out-of-sample extensions
        proj = LinearRegression()
        proj.fit(np.float64(train_data), np.float64(train_data_isomap))
        acc = evaluate_svm(proj.predict(train_data), train_labels,
                           proj.predict(test_data), test_labels)
    elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d':
        # Use the SEF to provide out-of-sample extensions
        if method == 'c-ISOMAP-10d':
            proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
            proj.cuda()
        else:
            proj = LinearSEF(train_data.shape[1], output_dimensionality=20)
            proj.cuda()
        loss = proj.fit(data=train_data, target_data=train_data_isomap, target='copy',
                        epochs=50, batch_size=1024, verbose=False, learning_rate=0.001, regularizer_weight=1)
        acc = evaluate_svm(proj.transform(train_data), train_labels,
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
项目:sef    作者:passalis    | 项目源码 | 文件源码
def __init__(self, input_dimensionality, output_dimensionality, scaler='default'):
        """
        SEF_Base constuctor
        :param input_dimensionality: dimensionality of the input space
        :param output_dimensionality: dimensionality of the target space
        :param scaler: the scaler used to scale the data
        """

        self.input_dimensionality = input_dimensionality
        self.output_dimensionality = output_dimensionality

        if scaler == 'default':
            self.scaler = StandardScaler()
        elif scaler is not None:
            self.scaler = scaler()
        else:
            self.scaler = None

        # Scaling factor for computing the similarity matrix of the projected data
        self.sigma_projection = np.float32(0.1)
        self.use_gpu = False

        # The parameters of the model that we want to learn
        self.trainable_params = []

        # Other non-trainable parametsr
        self.non_trainable_params = []
项目:AutoFolio    作者:mlindauer    | 项目源码 | 文件源码
def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''
        switch = CategoricalHyperparameter(
            "StandardScaler", choices=[True, False], default=True)
        cs.add_hyperparameter(switch)