Python sklearn.preprocessing 模块,minmax_scale() 实例源码

我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用sklearn.preprocessing.minmax_scale()

项目:ottertune    作者:cmu-db    | 项目源码 | 文件源码
def combine_rmse_gpvar(grid_scores, w_rmse=0.8, w_gpvar=0.2):
    from sklearn.preprocessing import minmax_scale

    # Scale rmses, gpvars to (0,1)
    scaled_scores = np.empty((len(grid_scores), 2))
    for i,scores in enumerate(grid_scores):
        scaled_scores[i,0] = scores.mean_scores[0]
        scaled_scores[i,1] = scores.mean_scores[1]
    rmse_sort_indices = np.argsort(scaled_scores[:,0])
    gpvar_sort_indices = np.argsort(scaled_scores[:,1])
    scaled_scores = minmax_scale(scaled_scores)
    combined_scores = w_rmse*scaled_scores[:,0] + w_gpvar*scaled_scores[:,1]
    comb_sort_indices = np.argsort(combined_scores)
    return CombinedScore(combined_scores,
                         scaled_scores,
                         comb_sort_indices,
                         rmse_sort_indices,
                         gpvar_sort_indices)
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def make_drop_duplicate(self, _df_csv_read_ori, _drop_duplicate , _label):
        """ Label? ??? ??? ??? ??? ??? Row ??? ????.
        Args:
          params:
            * _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
            * _df_csv_read_ori : pandas dataframe
            * _label
        Returns:
          Preprocessing Dataframe
        """
        if _drop_duplicate == None or _drop_duplicate == 'null' or _drop_duplicate == False:
            logging.info("No Duplicate")
            result_df =  _df_csv_read_ori
        else :
            cell_features = _df_csv_read_ori.columns.tolist()
            cell_features.remove(_label)
            result_df = _df_csv_read_ori.drop_duplicates(cell_features, keep="first")
            logging.info("duplicated row delete {0}".format(len(_df_csv_read_ori.index)-len(result_df.index)))
            temp_duplicate_filename = strftime("%Y-%m-%d-%H:%M:%S", gmtime()) + "_dup.csvbk"
            result_df.to_csv(self.data_src_path + "/backup/" + temp_duplicate_filename)
        return result_df
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cell_fd_extention(fname_org='sheet.gz/cell_db.cvs.gz', camera_bit_resolution=14):
    cell_df = pd.read_csv(fname_org)
    Limg, Lx, Ly = cell_fd_info(cell_df)

    cell_df_ext = cell_df.copy()

    # Fresnel diffraction
    cell_img_fd_a = cell_fd_conv(cell_df)
    cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)

    # max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
    cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
    cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
        np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
    cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
        Limg, Lx, Ly).astype(int)
    cell_df_ext[
        'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)

    return cell_df_ext
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cell_fd_extention(fname_org='sheet.gz/cell_db.cvs.gz', camera_bit_resolution=14):
    cell_df = pd.read_csv(fname_org)
    Limg, Lx, Ly = cell_fd_info(cell_df)

    cell_df_ext = cell_df.copy()

    # Fresnel diffraction
    cell_img_fd_a = cell_fd_conv(cell_df)
    cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)

    # max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
    cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
    cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
        np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
    cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
        Limg, Lx, Ly).astype(int)
    cell_df_ext[
        'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)

    return cell_df_ext
项目:fx    作者:TaRyu    | 项目源码 | 文件源码
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL):
    # data = pd.read_csv(file_in, dtype='str')
    # data['DateTime'] = pd.to_datetime(
    #     data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str),
    #     format='%Y%m%d%H%M%S')
    # data = data.set_index('DateTime')
    # data = pd.Series(data['<CLOSE>']).map(float)
    # data = data.resample('M').fillna(method='pad')
    # data = preprocessing.minmax_scale(data)
    # data_t = data[6:]
    # data_f = data.reshape(-1, 6)
    # data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)])
    # np.save(file_out[0], data_f[:len(data_f) - 1])
    # np.save(file_out[1], data_t)
    data = preprocessing.minmax_scale(pd.read_pickle(
        file_in)['close'])
    data = data.reshape(-1, 24)
    data_m = np.array([[data[i + x * 24][0] for x in range(5)]
                       for i in range(len(data) - 5 * 24 + 1)])
    data_m = data_m.reshape(-1, 5)
    data_s = np.array([data[i + 5 * 24][0]
                       for i in range(len(data) - 5 * 24)])
    np.save(file_out[0], data_m[:len(data_m) - 1])
    np.save(file_out[1], data_s)
项目:fx    作者:TaRyu    | 项目源码 | 文件源码
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL):
    # data = pd.read_csv(file_in, dtype='str')
    # data['DateTime'] = pd.to_datetime(
    #     data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str),
    #     format='%Y%m%d%H%M%S')
    # data = data.set_index('DateTime')
    # data = pd.Series(data['<CLOSE>']).map(float)
    # data = data.resample('M').fillna(method='pad')
    # data = preprocessing.minmax_scale(data)
    # data_t = data[6:]
    # data_f = data.reshape(-1, 6)
    # data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)])
    # np.save(file_out[0], data_f[:len(data_f) - 1])
    # np.save(file_out[1], data_t)
    data = preprocessing.minmax_scale(pd.read_pickle(
        file_in)['close'])
    data = data.reshape(-1, 24)
    data_m = np.array([[data[i + x][0] for x in range(5)]
                       for i in range(len(data) - 5 + 1)])
    data_m = data_m.reshape(-1, 5)
    data_s = np.array([data[i + 5][0]
                       for i in range(len(data) - 5)])
    np.save(file_out[0], data_m[:len(data_m) - 1])
    np.save(file_out[1], data_s)
项目:fx    作者:TaRyu    | 项目源码 | 文件源码
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL):
    # data = pd.read_csv(file_in, dtype='str')
    # data['DateTime'] = pd.to_datetime(
    #     data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str),
    #     format='%Y%m%d%H%M%S')
    # data = data.set_index('DateTime')
    # data = pd.Series(data['<CLOSE>']).map(float)
    # data = data.resample('M').fillna(method='pad')
    # data = preprocessing.minmax_scale(data)
    # data_t = data[6:]
    # data_f = data.reshape(-1, 6)
    # data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)])
    # np.save(file_out[0], data_f[:len(data_f) - 1])
    # np.save(file_out[1], data_t)
    data = preprocessing.minmax_scale(pd.read_pickle(
        file_in)['close'])
    data_m = np.array([[data[i + x * 24 * 24] for x in range(6)]
                       for i in range(len(data) - 6 * 24 * 24 + 1)])
    data_m = data_m.reshape(-1, 6)
    data_s = np.array([data[i + 6 * 24 * 24]
                       for i in range(len(data) - 6 * 24 * 24)])
    np.save(file_out[0], data_m[:len(data_m) - 1])
    np.save(file_out[1], data_s)
项目:xplore    作者:fahd09    | 项目源码 | 文件源码
def scale_feature(self, col=None, scaling=None, scaling_parms=None):
        '''
        Scales a given set  of numerical columns. This only works for columns 
        with numerical values. 

        Parameters
        ----------
        col : a string of a column name, or a list of many columns names or
                None (default). If col is None, all numerical columns will 
                be used.
        scaling  : {'zscore', 'minmax_scale' (default), 'scale', 'maxabs_scale', 
                    'robust_scale'}
            User-defined scaling functions can also be used through self.transform_feature
        scaling_parms : dictionary
            any additional parameters to be used for sklearn's scaling functions.

        '''            
        self._validate_params(params_list   = {'col':col,'scaling':scaling},
                              expected_types= {'col':[str,list,type(None)], 'scaling':[str,type(None)]})        

        if scaling is None: scaling = 'minmax_scale'

        if scaling == 'zscore':
            scaling = 'lambda x: (x - x.mean()) / x.std()'
        elif scaling ==  'minmax_scale' and scaling_parms is None:
            scaling_parms = {'feature_range':(0, 1),'axis':0}
        elif scaling ==  'scale' and scaling_parms is None:
            scaling_parms = {'with_mean':True, 'with_std':True,'axis':0}
        elif scaling ==  'maxabs_scale' and scaling_parms is None:
            scaling_parms = {'axis':0}
        elif scaling ==  'robust_scale' and scaling_parms is None:
            scaling_parms = {'with_centering':True, 'with_scaling':True, 'axis':0} # 'quantile_range':(25.0, 75.0), 
        else:
            raise TypeError('UNSUPPORTED scaling TYPE')

        self.transform_feature(col=col, func_str=scaling, addtional_params=scaling_parms)
项目:histonets-cv    作者:sul-cidr    | 项目源码 | 文件源码
def convert(image):
    """Convert a scikit-image binary image matrix to OpenCV"""
    with warnings.catch_warnings(record=True):
        warnings.filterwarnings('ignore', category=DataConversionWarning)
        return minmax_scale(image, (0, 255)).astype(np.ubyte)
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def make_preprocessing_pandas(self, _df_csv_read_ori, _preprocessing_type , _label):
        """ SKLearn? ???? Pandas? Proprocessing
            label? Preprocessing ?? ??
        Args:
          params:
            * _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
            * _df_csv_read_ori : pandas dataframe
            * _label
        Returns:
          Preprocessing DataFrame
        """
        if _preprocessing_type == None or _preprocessing_type == 'null':
            logging.info("No Preprocessing")
            result_df =  _df_csv_read_ori
        else :
            logging.info("Preprocessing type : {0}".format(_preprocessing_type))
            numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
            for i, v in _df_csv_read_ori.dtypes.iteritems():
                if v in numerics:
                    if i not in _label:
                        #preprocessing_types = ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
                        #_preprocessing_type = ['maxabs_scale']
                        if 'scale' in _preprocessing_type:
                            _df_csv_read_ori[i] = preprocessing.scale(_df_csv_read_ori[i].fillna(0.0))
                        if 'minmax_scale' in _preprocessing_type:
                            _df_csv_read_ori[i] = preprocessing.minmax_scale(_df_csv_read_ori[i].fillna(0.0))
                        if 'robust_scale' in _preprocessing_type:
                            _df_csv_read_ori[i] = preprocessing.robust_scale(_df_csv_read_ori[i].fillna(0.0))
                        if 'normalize' in _preprocessing_type:
                            _df_csv_read_ori[i] = preprocessing.normalize(_df_csv_read_ori[i].fillna(0.0))
                        if 'maxabs_scale' in _preprocessing_type:
                            _df_csv_read_ori[i] = preprocessing.maxabs_scale(_df_csv_read_ori[i].fillna(0.0))
            result_df = _df_csv_read_ori
        return result_df
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def extention(self, camera_bit_resolution=14):
        fname_org = self.fname_org
        h2d = self.h2d

        cell_df = pd.read_csv(fname_org)
        Limg, Lx, Ly = cell_fd_info(cell_df)

        cell_df_ext = cell_df.copy()

        # Fresnel diffraction
        cell_img_fd_a = cell_fd_conv(cell_df, h2d)
        cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)

        # max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
        cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
        cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
            np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
        cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
            Limg, Lx, Ly).astype(int)
        cell_df_ext[
            'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)

        return cell_df_ext


#Deep Learning
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def extention(self, camera_bit_resolution=14):
        fname_org = self.fname_org
        h2d = self.h2d

        cell_df = pd.read_csv(fname_org)
        Limg, Lx, Ly = cell_fd_info(cell_df)

        cell_df_ext = cell_df.copy()

        # Fresnel diffraction
        cell_img_fd_a = cell_fd_conv(cell_df, h2d)
        cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)

        # max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
        cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
        cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
            np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
        cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
            Limg, Lx, Ly).astype(int)
        cell_df_ext[
            'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)

        return cell_df_ext


#Deep Learning
项目:CryptoCurrencyTrader    作者:llens    | 项目源码 | 文件源码
def normalise_and_centre_score(strategy_score, up_threshold, low_threshold):

    """normalise and centre score when fitting thresholds"""

    temp_score = minmax_scale(strategy_score)

    temp_score = temp_score - 0.5
    temp_score[temp_score > up_threshold] = up_threshold
    temp_score[temp_score < -up_threshold] = -up_threshold
    temp_score[abs(temp_score) < low_threshold] = 0
    temp_score = temp_score + 0.5

    return temp_score
项目:fx    作者:TaRyu    | 项目源码 | 文件源码
def features_processing(file_in, file_out):
    features = np.load(file_in)
    new_features = preprocessing.minmax_scale(features, axis=1)
    np.save(file_out, new_features.astype('float32'))
项目:fx    作者:TaRyu    | 项目源码 | 文件源码
def features_processing(file_in, file_out):
    features = np.load(file_in)
    new_features = preprocessing.minmax_scale(features, axis=1)
    np.save(file_out, new_features.astype('float32'))