我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.log1p()。
def __call__(self, x): """ Args: x (FloatTensor/LongTensor or ndarray) Returns: x_mu (LongTensor or ndarray) """ mu = self.qc - 1. if isinstance(x, np.ndarray): x_mu = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu) x_mu = ((x_mu + 1) / 2 * mu + 0.5).astype(int) elif isinstance(x, (torch.Tensor, torch.LongTensor)): if isinstance(x, torch.LongTensor): x = x.float() mu = torch.FloatTensor([mu]) x_mu = torch.sign(x) * torch.log1p(mu * torch.abs(x)) / torch.log1p(mu) x_mu = ((x_mu + 1) / 2 * mu + 0.5).long() return x_mu
def rf1(train2, y, test2, v, z): cname = sys._getframe().f_code.co_name v[cname], z[cname] = 0, 0 N_splits = 300 scores = [] skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True) for n, (itrain, ival) in enumerate(skf.split(train2, y)): print('step %d of %d'%(n+1, skf.n_splits), now()) clf = ensemble.RandomForestRegressor(n_estimators=1000, max_depth=3, random_state=13) clf.fit(train2[itrain], y[itrain]) p = clf.predict(train2[ival]) v.loc[ival, cname] += p score = metrics.log_loss(y[ival], p) z[cname] += np.log1p(clf.predict(test2)) print(cname, 'step %d: score'%(n+1), score, now()) scores.append(score) print('validation loss: ', metrics.log_loss(y, v[cname])) cv=np.array(scores) print(cv, cv.mean(), cv.std()) z[cname] /= N_splits
def rf(train_sample, validation_sample, features, seed): log_base = np.e rf_est = RandomForestRegressor(n_estimators=500, criterion='mse', max_features=4, max_depth=None, bootstrap=True, min_samples_split=4, min_samples_leaf=1, min_weight_fraction_leaf=0, max_leaf_nodes=None, random_state=seed ).fit( train_sample[features], np.log1p(train_sample['volume']) / np.log(log_base)) rf_prob = np.power(log_base, rf_est.predict(validation_sample[features])) - 1 print_mape(validation_sample['volume'], rf_prob, 'RF') return rf_prob
def data_preprocess(train,test): outlier_idx = [4,11,13,20,46,66,70,167,178,185,199, 224,261, 309,313,318, 349,412,423,440,454,477,478, 523,540, 581,588,595,654,688, 691, 774, 798, 875, 898,926,970,987,1027,1109, 1169,1182,1239, 1256,1298,1324,1353,1359,1405,1442,1447] train.drop(train.index[outlier_idx],inplace=True) all_data = pd.concat((train.loc[:,'MSSubClass':'SaleCondition'], test.loc[:,'MSSubClass':'SaleCondition'])) to_delete = ['Alley','FireplaceQu','PoolQC','Fence','MiscFeature'] all_data = all_data.drop(to_delete,axis=1) train["SalePrice"] = np.log1p(train["SalePrice"]) #log transform skewed numeric features numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness skewed_feats = skewed_feats[skewed_feats > 0.75] skewed_feats = skewed_feats.index all_data[skewed_feats] = np.log1p(all_data[skewed_feats]) all_data = pd.get_dummies(all_data) all_data = all_data.fillna(all_data.mean()) X_train = all_data[:train.shape[0]] X_test = all_data[train.shape[0]:] y = train.SalePrice return X_train,X_test,y
def data_preprocess(train, test): outlier_idx = [4, 11, 13, 20, 46, 66, 70, 167, 178, 185, 199, 224, 261, 309, 313, 318, 349, 412, 423, 440, 454, 477, 478, 523, 540, 581, 588, 595, 654, 688, 691, 774, 798, 875, 898, 926, 970, 987, 1027, 1109, 1169, 1182, 1239, 1256, 1298, 1324, 1353, 1359, 1405, 1442, 1447] train.drop(train.index[outlier_idx], inplace=True) all_data = pd.concat((train.loc[:, 'MSSubClass':'SaleCondition'], test.loc[:, 'MSSubClass':'SaleCondition'])) to_delete = ['Alley', 'FireplaceQu', 'PoolQC', 'Fence', 'MiscFeature'] all_data = all_data.drop(to_delete, axis=1) train["SalePrice"] = np.log1p(train["SalePrice"]) # log transform skewed numeric features numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna())) # compute skewness skewed_feats = skewed_feats[skewed_feats > 0.75] skewed_feats = skewed_feats.index all_data[skewed_feats] = np.log1p(all_data[skewed_feats]) all_data = pd.get_dummies(all_data) all_data = all_data.fillna(method='ffill') X_train = all_data[:train.shape[0]] X_test = all_data[train.shape[0]:] y = train.SalePrice return X_train, X_test, y
def log_loss_value(Z, weights, total_weights, rho): """ computes the value and slope of the logistic loss in a numerically stable way supports sample non-negative weights for each example in the training data see http://stackoverflow.com/questions/20085768/ Parameters ---------- Z numpy.array containing training data with shape = (n_rows, n_cols) rho numpy.array of coefficients with shape = (n_cols,) total_weights numpy.sum(total_weights) (only included to reduce computation) weights numpy.array of sample weights with shape (n_rows,) Returns ------- loss_value scalar = 1/n_rows * sum(log( 1 .+ exp(-Z*rho)) """ scores = Z.dot(rho) pos_idx = scores > 0 loss_value = np.empty_like(scores) loss_value[pos_idx] = np.log1p(np.exp(-scores[pos_idx])) loss_value[~pos_idx] = -scores[~pos_idx] + np.log1p(np.exp(scores[~pos_idx])) loss_value = loss_value.dot(weights) / total_weights return loss_value
def log_loss_value(Z, rho): """ computes the value and slope of the logistic loss in a numerically stable way see also: http://stackoverflow.com/questions/20085768/ Parameters ---------- Z numpy.array containing training data with shape = (n_rows, n_cols) rho numpy.array of coefficients with shape = (n_cols,) Returns ------- loss_value scalar = 1/n_rows * sum(log( 1 .+ exp(-Z*rho)) """ scores = Z.dot(rho) pos_idx = scores > 0 loss_value = np.empty_like(scores) loss_value[pos_idx] = np.log1p(np.exp(-scores[pos_idx])) loss_value[~pos_idx] = -scores[~pos_idx] + np.log1p(np.exp(scores[~pos_idx])) loss_value = loss_value.mean() return loss_value
def feature_engineering(self, df): is_skewness_correction_for_all_features = 1 if is_skewness_correction_for_all_features: # Correcting for skewness # Treat all numerical variables that were not one-hot encoded if any(tuple(df.columns == 'y')): self.is_with_log1p_call_outcome = 1 numerical_feature_names_of_non_modified_df = TwoSigmaFinModTools._numerical_feature_names if TwoSigmaFinModTools._is_one_hot_encoder: numerical_feature_names_of_non_modified_df = numerical_feature_names_of_non_modified_df.values else: numerical_feature_names_of_non_modified_df = np.concatenate( [TwoSigmaFinModTools._feature_names_num.values, numerical_feature_names_of_non_modified_df.values]) relevant_features = df[numerical_feature_names_of_non_modified_df].columns[ (df[numerical_feature_names_of_non_modified_df].columns != 'Id')] self.skew_correction(df, relevant_features) else: # Only scale down Call Outcome, since all leave other numerical features standardized. if any(tuple(df.columns == 'Call Outcome')): self.is_with_log1p_call_outcome = 1 df.loc[:, tuple(['Call Outcome'])] = np.log1p(df['Call Outcome'])
def __call__(self, x_mu): """ Args: x_mu (FloatTensor/LongTensor or ndarray) Returns: x (FloatTensor or ndarray) """ mu = self.qc - 1. if isinstance(x_mu, np.ndarray): x = ((x_mu) / mu) * 2 - 1. x = np.sign(x) * (np.exp(np.abs(x) * np.log1p(mu)) - 1.) / mu elif isinstance(x_mu, (torch.Tensor, torch.LongTensor)): if isinstance(x_mu, torch.LongTensor): x_mu = x_mu.float() mu = torch.FloatTensor([mu]) x = ((x_mu) / mu) * 2 - 1. x = torch.sign(x) * (torch.exp(torch.abs(x) * torch.log1p(mu)) - 1.) / mu return x
def test_branch_cuts(self): # check branch cuts and continuity on them yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1], -1, 1, True yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True # check against bogus branch cuts: assert continuity between quadrants yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1 yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1 yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1 yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1 yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1 yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1
def test_branch_cuts_complex64(self): # check branch cuts and continuity on them yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64 yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64 yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True, np.complex64 yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 # check against bogus branch cuts: assert continuity between quadrants yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64 yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64 yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1, False, np.complex64 yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1, False, np.complex64 yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1, False, np.complex64 yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1, False, np.complex64
def _mu(distr, z, eta): """The non-linearity (inverse link).""" if distr in ['softplus', 'gamma']: mu = np.log1p(np.exp(z)) elif distr == 'poisson': mu = z.copy() intercept = (1 - eta) * np.exp(eta) mu[z > eta] = z[z > eta] * np.exp(eta) + intercept mu[z <= eta] = np.exp(z[z <= eta]) elif distr == 'gaussian': mu = z elif distr == 'binomial': mu = expit(z) elif distr == 'probit': mu = norm.cdf(z) return mu
def ests_ll_exact(self, params): """ Calculate the loglikelihood given model parameters `params`. This method uses an exact integral and returns exact ll values, i.e. it does not use quadrature to approximate the integral. """ mu, gamma, err = np.split(params, 3) d = self.num2 - mu q = self.w2 / err**2 r = d * q f = self.w2 @ (2 * np.log(abs(err)) + LOG2PI) a = q @ gamma**2 b = r @ gamma c = nsum_row(d * r) return .5 * (b * b / (a+1) - c - f - np.log1p(a))
def draw_links(self,n=1,log_sampling=False): """ Draw multiple random links. """ urls = [] domain_array = np.array([dmn for dmn in self.domain_links]) domain_count = np.array([len(self.domain_links[domain_array[k]]) for k in range(domain_array.shape[0])]) p = np.array([np.float(c) for c in domain_count]) count_total = p.sum() if log_sampling: # log-sampling [log(x+1)] to bias lower count domains p = np.fromiter((np.log1p(x) for x in p), dtype=p.dtype) if count_total > 0: p = p/p.sum() cnts = npr.multinomial(n, pvals=p) if n > 1: for k in range(cnts.shape[0]): domain = domain_array[k] cnt = min(cnts[k],domain_count[k]) for url in random.sample(self.domain_links[domain],cnt): urls.append(url) else: k = int(np.nonzero(cnts)[0]) domain = domain_array[k] url = random.sample(self.domain_links[domain],1)[0] urls.append(url) return urls
def c(vec): """Complement function for probabilities in the log-space: robustly computes 1-P(A) in the log-space Args: vec: vector of negative numbers representing log-probabilities of an event. Returns: the log-probabilities of (1-P(A)) were log(P(A)) are given in the vec numpy array Examples: >>> c(-1e-200) -460.51701859880916 # >>> np.log(1 - np.exp(-1e-200)) raises a `RuntimeWarning: divide by zero` error """ # return np.log1p(-np.exp(vec)) # Not robust to -1e-200 if np.max(np.array(vec)) > 0: print('vec', vec) return np.log(-np.expm1(vec))
def test_softplus(): # np.exp(z_max) will overflow z_max = np.log(sys.float_info.max) + 1.0e10 # 1.0 / np.exp(z_min) will overflow z_min = np.log(sys.float_info.min) - 1.0e10 inputs = np.array([0.0, 1.0, -1.0, z_min, z_max]) # naive implementation of np.log(1 + np.exp(z_max)) will overflow # naive implementation of z + np.log(1 + 1 / np.exp(z_min)) will # throw ZeroDivisionError outputs = np.array([ np.log(2.0), np.log1p(np.exp(1.0)), np.log1p(np.exp(-1.0)), 0.0, z_max ]) assert np.allclose(outputs, softplus(inputs))
def c_code(self, node, name, inp, out, sub): x, = inp z, = out # These constants were obtained by looking at the output of # python commands like: # for i in xrange(750): # print i, repr(numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt)))) # the boundary checks prevent us from generating inf # float16 limits: -17.0, 6.0 # We use the float32 limits for float16 for now as the # computation will happen in float32 anyway. if (node.inputs[0].type == scalar.float32 or node.inputs[0].type == scalar.float16): return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals() elif node.inputs[0].type == scalar.float64: return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals() else: raise NotImplementedError('only floatingpoint is implemented')
def getMatrix(path, directed=False, log1p=False): matrix = np.zeros(shape=(NCOUNTRIES,NCOUNTRIES)) with open(path, 'rb') as f: for line in f: data = line.split(' ') c1 = int(data[0])-1 c2 = int(data[1])-1 v = np.log1p(float(data[2])) if log1p else float(data[2]) matrix[c1][c2] = v # real data from file if not directed: matrix[c2][c1] = v # symmetry print '{} loaded as a matrix!'.format(path) return matrix ####################################################################### # Main ######################################################################
def getMatrix(path, directed=False, log1p=False): matrix = np.zeros(shape=(NCOUNTRIES,NCOUNTRIES)) with open(path, 'rb') as f: for line in f: data = line.split(' ') c1 = int(data[0])-1 c2 = int(data[1])-1 v = np.log1p(float(data[2])) if log1p else float(data[2]) matrix[c1][c2] = v # real data from file if not DIRECTED: matrix[c2][c1] = v # symmetry print '{} loaded as a matrix!'.format(path) return matrix ####################################################################### # Data Matrices #######################################################################
def log1p(data, copy=False): """Logarithmize the data matrix. Computes `X = log(X + 1)`, where `log` denotes the natural logrithm. Parameters ---------- data : array-like or AnnData The data matrix. copy : bool (default: False) If an AnnData is passed, determines whether a copy is returned. Returns ------- Returns or updates data, depending on `copy`. """ if isinstance(data, AnnData): adata = data.copy() if copy else data adata.X = log1p(data.X) return adata if copy else None X = data # proceed with data matrix if not issparse(X): return np.log1p(X) else: return X.log1p()
def signPreserveNorm(self): """ This is a sign preserving nomalisation used in Eye. Similar to that used by Romano et al. in SVM paper except they use log(1+|x|) i.e. don't divide by sigma. nomalizes the unraveled image vectorized on 24/07/13 """ #shape = np.shape(self.getObject()) Vec = np.nan_to_num(self.unravelObject()) #normVec = np.zeros((np.shape(Vec))) std = np.std(Vec) #for i in range(len(Vec)): # # log1p returns the natural log of (1+x)x # normVec[i] += ((Vec[i])/ np.abs(Vec[i]))*(np.log1p(np.abs(Vec[i])/std)) # #print normVec[i] normVec = ((Vec)/ np.abs(Vec))*(np.log1p(np.abs(Vec)/std)) return normVec
def signPreserveNorm(self): """ This is a sign preserving nomalisation used in Eye. Similar to that used by Romano et al. in SVM paper except they use log(1+|x|) i.e. don't divide by sigma. nomalizes the unraveled image vectorized on 24/07/13 """ #shape = np.shape(self.getObject()) Vec = np.nan_to_num(np.ravel(self.getImage(), order="F")) #normVec = np.zeros((np.shape(Vec))) std = np.std(Vec) #for i in range(len(Vec)): # # log1p returns the natural log of (1+x)x # normVec[i] += ((Vec[i])/ np.abs(Vec[i]))*(np.log1p(np.abs(Vec[i])/std)) # #print normVec[i] normVec = ((Vec)/ np.abs(Vec))*(np.log1p(np.abs(Vec)/std)) return normVec
def logsum_pair_table_interp(self, diff): """ Return the log1p term from precomputed table by interpolation. Cf. Treba Minimax log sum approximation might be even faster and more precise, TODO :param diff: x-y or y-x """ index = -int(diff) w = -diff - index val1 = self.logsum_table[index] val2 = self.logsum_table[index + 1] return val1 + (w * (val2 - val1))
def summarizeVdToPi(Vd): ''' Calculate summary vector of given doc-topic stick lengths Vd Returns -------- sumLogPi : 1D array, size K+1 sumELogPi[k] = \sum_d log pi_{dk} ''' with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning, message='divide by zero') logVd = np.log(Vd) log1mVd = np.log(1 - Vd) mask = Vd < 1e-15 log1mVd[mask] = np.log1p(-1 * Vd[mask]) assert not np.any(np.isnan(logVd)) logVd = replaceInfVals(logVd) log1mVd = replaceInfVals(log1mVd) sumlogVd = np.sum(logVd, axis=0) sumlog1mVd = np.sum(log1mVd, axis=0) sumlogPi = np.hstack([sumlogVd, 0]) sumlogPi[1:] += np.cumsum(sumlog1mVd) return sumlogPi
def lnPr(s,p,eps=1e-12,axis=-1): ''' Compute probability of bits s given Bernoulli probabilities p Assuming factorized distribution \prod p^x (1-p)^(1-x) Parameters ---------- s : bits p : probability of bits being 1 Returns ------- ''' p = p.copy() p[p<eps]=eps p[p>1-eps]=1-eps s = np.int32(s) return np.sum(s*slog(p)+(1-s)*np.log1p(-p),axis=axis)
def xgboost(train_sample, validation_sample, features, model_param): def evalmape(preds, dtrain): labels = dtrain.get_label() preds = np.power(log_base, preds) - 1 # return a pair metric_name, result # since preds are margin(before logistic transformation, cutoff at 0) return 'mape', np.abs((labels - preds) / labels).sum() / len(labels) param = {'max_depth': model_param['depth'], 'eta': model_param['lr'], 'silent': 1, 'objective': 'reg:linear', 'booster': 'gbtree', 'subsample': model_param['sample'], 'seed':model_param['seed'], 'colsample_bytree':1, 'min_child_weight':1, 'gamma':0} param['eval_metric'] = 'mae' num_round = model_param['tree'] log_base = np.e plst = param.items() dtrain = xgb.DMatrix(train_sample[features], np.log1p(train_sample['volume'])/np.log(log_base)) dtest = xgb.DMatrix(validation_sample[features], validation_sample['volume']) watchlist = [(dtest, 'eval'), (dtrain, 'train')] bst = xgb.train(plst, dtrain, num_round, watchlist, feval=evalmape) xgboost_prob = np.power(log_base, bst.predict(dtest)) - 1 # MAPE print_mape(validation_sample['volume'], xgboost_prob, 'XGBOOST') return xgboost_prob
def exrf(train_sample, validation_sample, features, seed): log_base = np.e exrf_est = ExtraTreesRegressor(n_estimators=1000, criterion='mse', max_features='auto', max_depth=None, bootstrap=True, min_samples_split=4, min_samples_leaf=1, min_weight_fraction_leaf=0, max_leaf_nodes=None, random_state=seed ).fit( train_sample[features], np.log1p(train_sample['volume']) / np.log(log_base)) exrf_prob = np.power(log_base, exrf_est.predict(validation_sample[features])) - 1 print_mape(validation_sample['volume'], exrf_prob, 'EXTRA-RF') return exrf_prob
def log_1minus(x): """Computes log(1 - x). More accurate than doing np.log(1-x).""" return np.log1p(-x)
def log_prob_correct_from_qual(q): """Computes the probability of no error given a phred quality.""" return np.log1p(- 10**(-0.1 * q))
def rmsle(y,yp): y1 = y.copy() y1[y1<0] = 0 return rmse(np.log1p(y1),np.log1p(yp))
def log_loss_value_from_scores(weights, total_weights, scores): """ computes the logistic loss value from a vector of scores in a numerically stable way where scores = Z.dot(rho) see also: http://stackoverflow.com/questions/20085768/ this function is used for heuristics (discrete_descent, sequential_rounding). to save computation when running the heuristics, we store the scores and call this function to compute the loss directly from the scores this reduces the need to recompute the dot product. Parameters ---------- scores numpy.array of scores = Z.dot(rho) total_weights numpy.sum(total_weights) (only included to reduce computation) weights numpy.array of sample weights with shape (n_rows,) Returns ------- loss_value scalar = 1/n_rows * sum(log( 1 .+ exp(-Z*rho)) """ pos_idx = scores > 0 loss_value = np.empty_like(scores) loss_value[pos_idx] = np.log1p(np.exp(-scores[pos_idx])) loss_value[~pos_idx] = -scores[~pos_idx] + np.log1p(np.exp(scores[~pos_idx])) loss_value = loss_value.dot(weights) / total_weights return loss_value
def log_loss_value_and_slope(Z, rho): """ computes the value and slope of the logistic loss in a numerically stable way this function should only be used when generating cuts in cutting-plane algorithms (computing both the value and the slope at the same time is slightly cheaper) see also: http://stackoverflow.com/questions/20085768/ Parameters ---------- Z numpy.array containing training data with shape = (n_rows, n_cols) rho numpy.array of coefficients with shape = (n_cols,) Returns ------- loss_value scalar = 1/n_rows * sum(log( 1 .+ exp(-Z*rho)) loss_slope: (n_cols x 1) vector = 1/n_rows * sum(-Z*rho ./ (1+exp(-Z*rho)) """ scores = Z.dot(rho) pos_idx = scores > 0 exp_scores_pos = np.exp(-scores[pos_idx]) exp_scores_neg = np.exp(scores[~pos_idx]) #compute loss value loss_value = np.empty_like(scores) loss_value[pos_idx] = np.log1p(exp_scores_pos) loss_value[~pos_idx] = -scores[~pos_idx] + np.log1p(exp_scores_neg) loss_value = loss_value.mean() #compute loss slope log_probs = np.empty_like(scores) log_probs[pos_idx] = 1.0 / (1.0 + exp_scores_pos) log_probs[~pos_idx] = exp_scores_neg / (1.0 + exp_scores_neg) loss_slope = Z.T.dot(log_probs - 1.0) / Z.shape[0] return loss_value, loss_slope
def skew_correction(df, numerical_features): # Skew correction skewed_feats = df[numerical_features].apply(lambda x: skew(x.dropna())) # compute skewness skewed_feats = skewed_feats[skewed_feats > 0.75] skewed_feats = skewed_feats.index df.loc[:, tuple(skewed_feats)] = np.log1p(np.asarray(df[skewed_feats], dtype=float))
def _logcdf(self, samples): if self.theta == 0: vals = np.sum(np.log(samples), axis=1) else: old_settings = np.seterr(divide='ignore') vals = np.log(-np.log1p(np.expm1(-self.theta * samples[:, 0]) * np.expm1(-self.theta * samples[:, 1]) / (np.expm1(-self.theta)))) \ - np.log(self.theta) np.seterr(**old_settings) return vals
def _ppcf(self, samples): if self.theta == 0: vals = samples[:, 0] else: vals = -np.log1p(samples[:, 0] * np.expm1(-self.theta) / (np.exp(-self.theta * samples[:, 1]) - samples[:, 0] * np.expm1(-self.theta * samples[:, 1]))) \ / self.theta return vals