我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用sklearn.preprocessing.minmax_scale()。
def combine_rmse_gpvar(grid_scores, w_rmse=0.8, w_gpvar=0.2): from sklearn.preprocessing import minmax_scale # Scale rmses, gpvars to (0,1) scaled_scores = np.empty((len(grid_scores), 2)) for i,scores in enumerate(grid_scores): scaled_scores[i,0] = scores.mean_scores[0] scaled_scores[i,1] = scores.mean_scores[1] rmse_sort_indices = np.argsort(scaled_scores[:,0]) gpvar_sort_indices = np.argsort(scaled_scores[:,1]) scaled_scores = minmax_scale(scaled_scores) combined_scores = w_rmse*scaled_scores[:,0] + w_gpvar*scaled_scores[:,1] comb_sort_indices = np.argsort(combined_scores) return CombinedScore(combined_scores, scaled_scores, comb_sort_indices, rmse_sort_indices, gpvar_sort_indices)
def make_drop_duplicate(self, _df_csv_read_ori, _drop_duplicate , _label): """ Label? ??? ??? ??? ??? ??? Row ??? ????. Args: params: * _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale'] * _df_csv_read_ori : pandas dataframe * _label Returns: Preprocessing Dataframe """ if _drop_duplicate == None or _drop_duplicate == 'null' or _drop_duplicate == False: logging.info("No Duplicate") result_df = _df_csv_read_ori else : cell_features = _df_csv_read_ori.columns.tolist() cell_features.remove(_label) result_df = _df_csv_read_ori.drop_duplicates(cell_features, keep="first") logging.info("duplicated row delete {0}".format(len(_df_csv_read_ori.index)-len(result_df.index))) temp_duplicate_filename = strftime("%Y-%m-%d-%H:%M:%S", gmtime()) + "_dup.csvbk" result_df.to_csv(self.data_src_path + "/backup/" + temp_duplicate_filename) return result_df
def cell_fd_extention(fname_org='sheet.gz/cell_db.cvs.gz', camera_bit_resolution=14): cell_df = pd.read_csv(fname_org) Limg, Lx, Ly = cell_fd_info(cell_df) cell_df_ext = cell_df.copy() # Fresnel diffraction cell_img_fd_a = cell_fd_conv(cell_df) cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1) # max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"]) cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1) cell_img_fd_a_2d_scale = preprocessing.minmax_scale( np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution) cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape( Limg, Lx, Ly).astype(int) cell_df_ext[ 'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1) return cell_df_ext
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL): # data = pd.read_csv(file_in, dtype='str') # data['DateTime'] = pd.to_datetime( # data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str), # format='%Y%m%d%H%M%S') # data = data.set_index('DateTime') # data = pd.Series(data['<CLOSE>']).map(float) # data = data.resample('M').fillna(method='pad') # data = preprocessing.minmax_scale(data) # data_t = data[6:] # data_f = data.reshape(-1, 6) # data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)]) # np.save(file_out[0], data_f[:len(data_f) - 1]) # np.save(file_out[1], data_t) data = preprocessing.minmax_scale(pd.read_pickle( file_in)['close']) data = data.reshape(-1, 24) data_m = np.array([[data[i + x * 24][0] for x in range(5)] for i in range(len(data) - 5 * 24 + 1)]) data_m = data_m.reshape(-1, 5) data_s = np.array([data[i + 5 * 24][0] for i in range(len(data) - 5 * 24)]) np.save(file_out[0], data_m[:len(data_m) - 1]) np.save(file_out[1], data_s)
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL): # data = pd.read_csv(file_in, dtype='str') # data['DateTime'] = pd.to_datetime( # data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str), # format='%Y%m%d%H%M%S') # data = data.set_index('DateTime') # data = pd.Series(data['<CLOSE>']).map(float) # data = data.resample('M').fillna(method='pad') # data = preprocessing.minmax_scale(data) # data_t = data[6:] # data_f = data.reshape(-1, 6) # data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)]) # np.save(file_out[0], data_f[:len(data_f) - 1]) # np.save(file_out[1], data_t) data = preprocessing.minmax_scale(pd.read_pickle( file_in)['close']) data = data.reshape(-1, 24) data_m = np.array([[data[i + x][0] for x in range(5)] for i in range(len(data) - 5 + 1)]) data_m = data_m.reshape(-1, 5) data_s = np.array([data[i + 5][0] for i in range(len(data) - 5)]) np.save(file_out[0], data_m[:len(data_m) - 1]) np.save(file_out[1], data_s)
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL): # data = pd.read_csv(file_in, dtype='str') # data['DateTime'] = pd.to_datetime( # data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str), # format='%Y%m%d%H%M%S') # data = data.set_index('DateTime') # data = pd.Series(data['<CLOSE>']).map(float) # data = data.resample('M').fillna(method='pad') # data = preprocessing.minmax_scale(data) # data_t = data[6:] # data_f = data.reshape(-1, 6) # data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)]) # np.save(file_out[0], data_f[:len(data_f) - 1]) # np.save(file_out[1], data_t) data = preprocessing.minmax_scale(pd.read_pickle( file_in)['close']) data_m = np.array([[data[i + x * 24 * 24] for x in range(6)] for i in range(len(data) - 6 * 24 * 24 + 1)]) data_m = data_m.reshape(-1, 6) data_s = np.array([data[i + 6 * 24 * 24] for i in range(len(data) - 6 * 24 * 24)]) np.save(file_out[0], data_m[:len(data_m) - 1]) np.save(file_out[1], data_s)
def scale_feature(self, col=None, scaling=None, scaling_parms=None): ''' Scales a given set of numerical columns. This only works for columns with numerical values. Parameters ---------- col : a string of a column name, or a list of many columns names or None (default). If col is None, all numerical columns will be used. scaling : {'zscore', 'minmax_scale' (default), 'scale', 'maxabs_scale', 'robust_scale'} User-defined scaling functions can also be used through self.transform_feature scaling_parms : dictionary any additional parameters to be used for sklearn's scaling functions. ''' self._validate_params(params_list = {'col':col,'scaling':scaling}, expected_types= {'col':[str,list,type(None)], 'scaling':[str,type(None)]}) if scaling is None: scaling = 'minmax_scale' if scaling == 'zscore': scaling = 'lambda x: (x - x.mean()) / x.std()' elif scaling == 'minmax_scale' and scaling_parms is None: scaling_parms = {'feature_range':(0, 1),'axis':0} elif scaling == 'scale' and scaling_parms is None: scaling_parms = {'with_mean':True, 'with_std':True,'axis':0} elif scaling == 'maxabs_scale' and scaling_parms is None: scaling_parms = {'axis':0} elif scaling == 'robust_scale' and scaling_parms is None: scaling_parms = {'with_centering':True, 'with_scaling':True, 'axis':0} # 'quantile_range':(25.0, 75.0), else: raise TypeError('UNSUPPORTED scaling TYPE') self.transform_feature(col=col, func_str=scaling, addtional_params=scaling_parms)
def convert(image): """Convert a scikit-image binary image matrix to OpenCV""" with warnings.catch_warnings(record=True): warnings.filterwarnings('ignore', category=DataConversionWarning) return minmax_scale(image, (0, 255)).astype(np.ubyte)
def make_preprocessing_pandas(self, _df_csv_read_ori, _preprocessing_type , _label): """ SKLearn? ???? Pandas? Proprocessing label? Preprocessing ?? ?? Args: params: * _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale'] * _df_csv_read_ori : pandas dataframe * _label Returns: Preprocessing DataFrame """ if _preprocessing_type == None or _preprocessing_type == 'null': logging.info("No Preprocessing") result_df = _df_csv_read_ori else : logging.info("Preprocessing type : {0}".format(_preprocessing_type)) numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] for i, v in _df_csv_read_ori.dtypes.iteritems(): if v in numerics: if i not in _label: #preprocessing_types = ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale'] #_preprocessing_type = ['maxabs_scale'] if 'scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.scale(_df_csv_read_ori[i].fillna(0.0)) if 'minmax_scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.minmax_scale(_df_csv_read_ori[i].fillna(0.0)) if 'robust_scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.robust_scale(_df_csv_read_ori[i].fillna(0.0)) if 'normalize' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.normalize(_df_csv_read_ori[i].fillna(0.0)) if 'maxabs_scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.maxabs_scale(_df_csv_read_ori[i].fillna(0.0)) result_df = _df_csv_read_ori return result_df
def extention(self, camera_bit_resolution=14): fname_org = self.fname_org h2d = self.h2d cell_df = pd.read_csv(fname_org) Limg, Lx, Ly = cell_fd_info(cell_df) cell_df_ext = cell_df.copy() # Fresnel diffraction cell_img_fd_a = cell_fd_conv(cell_df, h2d) cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1) # max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"]) cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1) cell_img_fd_a_2d_scale = preprocessing.minmax_scale( np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution) cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape( Limg, Lx, Ly).astype(int) cell_df_ext[ 'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1) return cell_df_ext #Deep Learning
def normalise_and_centre_score(strategy_score, up_threshold, low_threshold): """normalise and centre score when fitting thresholds""" temp_score = minmax_scale(strategy_score) temp_score = temp_score - 0.5 temp_score[temp_score > up_threshold] = up_threshold temp_score[temp_score < -up_threshold] = -up_threshold temp_score[abs(temp_score) < low_threshold] = 0 temp_score = temp_score + 0.5 return temp_score
def features_processing(file_in, file_out): features = np.load(file_in) new_features = preprocessing.minmax_scale(features, axis=1) np.save(file_out, new_features.astype('float32'))