我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.preprocessing.maxabs_scale()。
def query(self, query, k=1, sort=True): models = self.retrieval_models weights = maxabs_scale(self.weights) # max 1 does not crash [0,1] agg_fn = self.aggregation_fn # we only need to sort in the final run combined = [m.query(query, k=k, sort=False) for m in models] if weights is not None: combined = [{k: v * w for k, v in r.items()} for r, w in zip(combined, weights)] combined = aggregate_dicts(combined, agg_fn=agg_fn, sort=True) if sort: # only cut-off at k if this is the final (sorted) output combined = OrderedDict(sorted(combined.items(), key=itemgetter(1), reverse=True)[:k]) return combined
def make_drop_duplicate(self, _df_csv_read_ori, _drop_duplicate , _label): """ Label? ??? ??? ??? ??? ??? Row ??? ????. Args: params: * _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale'] * _df_csv_read_ori : pandas dataframe * _label Returns: Preprocessing Dataframe """ if _drop_duplicate == None or _drop_duplicate == 'null' or _drop_duplicate == False: logging.info("No Duplicate") result_df = _df_csv_read_ori else : cell_features = _df_csv_read_ori.columns.tolist() cell_features.remove(_label) result_df = _df_csv_read_ori.drop_duplicates(cell_features, keep="first") logging.info("duplicated row delete {0}".format(len(_df_csv_read_ori.index)-len(result_df.index))) temp_duplicate_filename = strftime("%Y-%m-%d-%H:%M:%S", gmtime()) + "_dup.csvbk" result_df.to_csv(self.data_src_path + "/backup/" + temp_duplicate_filename) return result_df
def sk_abs_scale(X): return maxabs_scale(X)
def scale_feature(self, col=None, scaling=None, scaling_parms=None): ''' Scales a given set of numerical columns. This only works for columns with numerical values. Parameters ---------- col : a string of a column name, or a list of many columns names or None (default). If col is None, all numerical columns will be used. scaling : {'zscore', 'minmax_scale' (default), 'scale', 'maxabs_scale', 'robust_scale'} User-defined scaling functions can also be used through self.transform_feature scaling_parms : dictionary any additional parameters to be used for sklearn's scaling functions. ''' self._validate_params(params_list = {'col':col,'scaling':scaling}, expected_types= {'col':[str,list,type(None)], 'scaling':[str,type(None)]}) if scaling is None: scaling = 'minmax_scale' if scaling == 'zscore': scaling = 'lambda x: (x - x.mean()) / x.std()' elif scaling == 'minmax_scale' and scaling_parms is None: scaling_parms = {'feature_range':(0, 1),'axis':0} elif scaling == 'scale' and scaling_parms is None: scaling_parms = {'with_mean':True, 'with_std':True,'axis':0} elif scaling == 'maxabs_scale' and scaling_parms is None: scaling_parms = {'axis':0} elif scaling == 'robust_scale' and scaling_parms is None: scaling_parms = {'with_centering':True, 'with_scaling':True, 'axis':0} # 'quantile_range':(25.0, 75.0), else: raise TypeError('UNSUPPORTED scaling TYPE') self.transform_feature(col=col, func_str=scaling, addtional_params=scaling_parms)
def preprocess_features(X, ips): ''' Scale the feature vectors using scikit preprocessing. ''' assert(len(X.shape) == 2) # Double check that X is 2d. X = preprocessing.maxabs_scale(X, copy=False) return X, ips
def preprocess_features(X, Y): ''' Scale the feature vectors using scikit preprocessing. ''' assert(len(X.shape) == 2) # Double check that X is 2d. X = preprocessing.maxabs_scale(X, copy=False) return X, Y
def make_preprocessing_pandas(self, _df_csv_read_ori, _preprocessing_type , _label): """ SKLearn? ???? Pandas? Proprocessing label? Preprocessing ?? ?? Args: params: * _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale'] * _df_csv_read_ori : pandas dataframe * _label Returns: Preprocessing DataFrame """ if _preprocessing_type == None or _preprocessing_type == 'null': logging.info("No Preprocessing") result_df = _df_csv_read_ori else : logging.info("Preprocessing type : {0}".format(_preprocessing_type)) numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] for i, v in _df_csv_read_ori.dtypes.iteritems(): if v in numerics: if i not in _label: #preprocessing_types = ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale'] #_preprocessing_type = ['maxabs_scale'] if 'scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.scale(_df_csv_read_ori[i].fillna(0.0)) if 'minmax_scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.minmax_scale(_df_csv_read_ori[i].fillna(0.0)) if 'robust_scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.robust_scale(_df_csv_read_ori[i].fillna(0.0)) if 'normalize' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.normalize(_df_csv_read_ori[i].fillna(0.0)) if 'maxabs_scale' in _preprocessing_type: _df_csv_read_ori[i] = preprocessing.maxabs_scale(_df_csv_read_ori[i].fillna(0.0)) result_df = _df_csv_read_ori return result_df