我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用scipy.stats.stats.pearsonr()。
def determineCoefficientDifference(self, coefficients): targetList = [] comparisionList = [] for key in self.wantedCoefficients: targetList.append(self.wantedCoefficients[key]) if key in coefficients: comparisionList.append(coefficients[key]) else: comparisionList.append(0) for key in coefficients: if key in self.wantedCoefficients: continue else: targetList.append(0) comparisionList.append(coefficients[key]) return pearsonr(targetList, comparisionList)
def plot_scatter_charts(data, file_name): scatters = [] for lang, values in data.items(): s = figure(width=300, plot_height=300, title=lang) s.yaxis.formatter = NumeralTickFormatter(format="0.0a") s.circle(values[0], values[1], size=10, color="navy", alpha=0.5) x = np.linspace(1, 100, 10) # noinspection PyTupleAssignmentBalance m, b = np.polyfit(values[0], values[1], 1) y = m * x + b corr_coef = round(pearsonr(values[0], values[1])[0], 1) s.line(x, y, legend=f'PCC = {corr_coef}') scatters.append(s) split_scatters = split(scatters, 3) p = gridplot(split_scatters) output_file(file_name) show(p)
def correlation(self, x, y, show=True): ''' Computes Pearson's correlation value of variables x and y. Diagonal values are removed. :param x: numpy array independent variable :param y: numpu array dependent variable :param show: if True then shows pearson's correlation and p-value. :return: ''' if not self.diagonal: xflatten = np.delete(x, [i*(x.shape[0]+1)for i in range(x.shape[0])]) yflatten = np.delete(y, [i*(y.shape[0]+1)for i in range(y.shape[0])]) pc = pearsonr(xflatten, yflatten) else: pc = pearsonr(x.flatten(), y.flatten()) if show: utils.printf('Pearson Correlation: {}'.format(pc[0])) utils.printf('p-value: {}'.format(pc[1])) return pc ##################################################################################### # Handlers #####################################################################################
def get_best_two_params(self): param_names = self.jobs.get_param_names() if len(param_names) == 2: return param_names # there can be only two. # how much does each parameter correlate with the achieved loss... param_losscorr = {} for name in self.param_names: corr_coef, pval = pearsonr( self.losses, self.param_values[name] ) logging.info('Correlation of {} with loss: {}'.format(name, corr_coef)) param_losscorr[name] = abs(corr_coef) # abs, since we don't care about the direction sorted_by_corr = sorted(param_losscorr.items(), key=lambda x:x[1], reverse=True) best_params = [] for i in sorted_by_corr: if math.isnan( i[1] ): continue best_params.append(i[0]) if len(best_params) == 2: return best_params return best_params #return sorted_by_corr[0][0], sorted_by_corr[1][0] # TODO: could be made more general/robust
def correlations_ground_truth(): print 'ground truth' #load network wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering.xml.gz") #read counts with zeros article_counts = pd.read_csv(TMP+'article_counts.tsv', sep='\t') cor = {} for damping in [0.8,0.9]: page_rank = pagerank(wikipedia, damping=damping) wikipedia.vertex_properties['page_rank_'+str(damping)] = page_rank page_rank_values = list() counts = list() correlations_values = {} for index, row in article_counts.iterrows(): counts.append(float(row['counts'])) page_rank_values.append(page_rank[wikipedia.vertex(int(row['target_article_id']))]) print 'pearson' p = pearsonr(page_rank_values, counts) print p correlations_values['pearson']=p print 'spearmanr' s = spearmanr(page_rank_values, counts) print s correlations_values['spearmanr']=s print 'kendalltau' k = kendalltau(page_rank_values, counts) print k correlations_values['kendalltau']=k cor['page_rank_'+str(damping)]=correlations_values write_pickle(HOME+'output/correlations/correlations_pagerank.obj', cor)
def correlations_weighted_unweighted(labels): #load network print 'weighted vs unweighted' name = '_'.join(labels) wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering_"+name+".xml.gz") #read counts with zeros wikipedia_u = load_graph("output/weightedpagerank/wikipedianetwork_sem_sim_distinct_links.xml.gz") correlations_weighted_pagerank = {} for label in labels: for damping in [0.8,0.85,0.9]: correlations_values={} key_weighted = label+"_page_rank_weighted_"+str(damping) pagerank_weighted = wikipedia.vertex_properties[key_weighted] key_unweighted = "page_rank"+str(damping) pagerank_unweighted = wikipedia_u.vertex_properties[key_unweighted] print 'pearson' p = pearsonr(pagerank_weighted.a, pagerank_unweighted.a) print p correlations_values['pearson']=p print 'spearmanr' s = spearmanr(pagerank_weighted.a, pagerank_unweighted.a) print s correlations_values['spearmanr']=s print 'kendalltau' k = kendalltau(pagerank_weighted.a, pagerank_unweighted.a) print k correlations_values['kendalltau']=k correlations_weighted_pagerank[label+str(damping)]=correlations_values write_pickle(HOME+'output/correlations/correlations_pagerank_weightedvsunweighted'+name+'.obj', correlations_weighted_pagerank)
def cor_analysis(co_price, pcb_price): """ ???PCB??????? """ cor_draw(co_price, pcb_price) print(pearsonr(co_price.values, pcb_price.values))
def compute_corr(self, ref_data, gen_data): corr_coef = pearsonr(ref_data, gen_data) return corr_coef[0]
def sum_corr(view1,view2,flag=''): print("test correlation") corr = 0 for i,j in zip(view1,view2): corr += measures.pearsonr(i,j)[0] print('avg sum corr ::',flag,'::',corr/len(view1))
def cal_sim(model,ind1,ind2=1999): view1 = np.load("test_v1.npy")[0:ind1] view2 = np.load("test_v2.npy")[0:ind2] label1 = np.load('test_l.npy') x1 = project(model,[view1,np.zeros_like(view1)]) x2 = project(model,[np.zeros_like(view2),view2]) label2 = [] count = 0 MAP=0 for i,j in enumerate(x1): cor = [] AP=0 for y in x2: temp1 = j.tolist() temp2 = y.tolist() cor.append(pearsonr(temp1,temp2)) #if i == np.argmax(cor): # count+=1 #val=[(q,(i*ind1+p))for p,q in enumerate(cor)] val=[(q,p)for p,q in enumerate(cor)] val.sort() val.reverse() label2.append(val[0:4]) t = [w[1]for w in val[0:7]] #print t for x,y in enumerate(t): if y in range(i,i+5): AP+=1/(x+1) print(t) print(AP) MAP+=AP #print 'accuracy :- ',float(count)*100/ind1,'%' print('MAP is : ',MAP/ind1)
def mospat_manip_calcstats(c_Variable, c_Model, f_ObsData, f_ModelData): # ELIMINATING ELEMENTS WITH NAN idx_ModData=np.where(~np.isnan(f_ModelData)) idx_ObsData=np.where(~np.isnan(f_ObsData)) f_ObsData_aux=f_ObsData[idx_ModData] f_ModelData_aux=f_ModelData[idx_ModData] # Model Mean f_ModMean=np.nanmean(f_ModelData_aux) # Obs Mean f_ObsMean=np.nanmean(f_ObsData_aux) # Mean Bias f_MeanBias=f_ModMean-f_ObsMean # Mean Normalized Bias f_mnb=(f_ModMean-f_ObsMean)/f_ObsMean # Root Mean Square Error f_rms=np.sqrt(((f_ModelData_aux-f_ObsData_aux)**2).mean()) # Pearson Correlation Coefficient f_corr=pearsonr(f_ObsData_aux,f_ModelData_aux)[0] # Standard Deviation of Observations f_StdObs=np.std(f_ObsData_aux) # Standard Deviation of Model Data f_StdMod=np.std(f_ModelData_aux) # Ratio of Standard Deviation f_StdRatio=f_StdMod/f_StdObs f_Statistics=[f_ObsMean, f_ModMean, f_MeanBias, f_mnb, f_rms, f_corr, f_StdObs, f_StdMod, f_StdRatio] return f_Statistics
def evaluate(DATA_SET): PREDS = [predict(u, m) for (u, m, r) in DATA_SET] REALS = [r for (u, m, r) in DATA_SET] mae = sum(abs(REALS[i] - PREDS[i]) for i in range(len(PREDS)))/len(PREDS) print 'MAE = ', round(mae, 3) r, p = pearsonr(PREDS, REALS) print 'cor = ', round(r, 3)
def adbPredictor(df): dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df) # clf = linear_model.SGDRegressor() clf = ensemble.AdaBoostRegressor() clf.fit(dataTrainX, dataTrainY) predicted = clf.predict(dataTestX) fig, ax = plotter.subplots() ax.set_ylabel('Predicted KNN Weekly') ax.scatter(dataTestY, predicted) ax.set_xlabel('Measured') predicted = np.reshape(predicted, (predicted.size, 1)) corrCoeff = pearsonr(dataTestY,predicted) print(corrCoeff[0]) plotter.show() return predicted
def knnPredictor(df): dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df) corelationCoefficiantDictionary = {} corelationCoefficiantArray = [] for k in range(1, 200, 1): knnModel = KNeighborsRegressor(n_neighbors=k) knnModel.fit(dataTrainX, dataTrainY) knnpredicted = knnModel.predict(dataTestX) corelationCoefficient = pearsonr(dataTestY, knnpredicted) corelationCoefficiantDictionary[k] = corelationCoefficient[0] corelationCoefficiantArray.append(corelationCoefficient[0]) # plotter.plot(corelationCoefficiantArray) bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get) knnModelBest = KNeighborsRegressor(n_neighbors=bestK) knnModelBest.fit(dataTrainX, dataTrainY) print("K = ") print(bestK) print("Corelation Coeff:") print(corelationCoefficiantDictionary[bestK]) knnpredictedBest = knnModelBest.predict(dataTestX) fig, ax = plotter.subplots() corelationCoefficient = pearsonr(dataTestY, knnpredictedBest) print(corelationCoefficient[0]) ax.set_ylabel('Predicted KNN Weekly') ax.scatter(dataTestY, knnpredictedBest) ax.set_xlabel('Measured') plotter.show()
def randomForestPredictor(df): # bbValTest, bbValTrain, ptChangeTest, ptChangeTrain = sample(df) dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df) corelationCoefficiantDictionary = {} corelationCoefficiantArray = [] for k in range(1, 100, 1): rfsModel = RandomForestRegressor(n_estimators=k) rfsModel.fit(dataTrainX, dataTrainY) rfspredicted = rfsModel.predict(dataTestX) rfspredicted = np.reshape(rfspredicted, (rfspredicted.size, 1)) corelationCoefficient = pearsonr(dataTestY, rfspredicted) corelationCoefficiantDictionary[k] = corelationCoefficient[0] corelationCoefficiantArray.append(corelationCoefficient[0]) plotter.plot(corelationCoefficiantArray) # plotter.show() bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get) rfsModelBest = RandomForestRegressor(n_estimators=bestK) rfsModelBest.fit(dataTrainX, dataTrainY) print("K = ") print(bestK) print("Correlation Coefficient =") print(corelationCoefficiantDictionary[bestK]) rfsPredictedBest = rfsModelBest.predict(dataTestX) fig, ax = plotter.subplots() ax.set_ylabel('Predicted RandomForest Weekly') ax.scatter(dataTestY, rfsPredictedBest) ax.set_xlabel('Measured') plotter.show()
def predictKnn(data, priceToPredict): corelationCoefficiantDictionary = {} corelationCoefficiantArray = [] openingPriceTrain, openingPriceTest, closingPriceTrain, closingPriceTest = \ data["openingPriceTrain"], data["openingPriceTest"], data["closingPriceTrain"], data["closingPriceTest"] for k in range( 1 , 100 , 1): neigh = KNeighborsRegressor(n_neighbors=k) #n = 7 best fits neigh.fit(openingPriceTrain, closingPriceTrain) closingPriceTestArray = np.reshape(closingPriceTest,-1) knnpr = neigh.predict(openingPriceTest) predictedArray = np.reshape(knnpr,-1) corelationCoefficient = pearsonr(closingPriceTestArray,predictedArray) corelationCoefficiantDictionary[k] = corelationCoefficient[0] corelationCoefficiantArray.append(corelationCoefficient[0]) plotter.plot(corelationCoefficiantArray) # plotter.show() bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get) neighBest = KNeighborsRegressor(n_neighbors=bestK) neighBest.fit(openingPriceTrain, closingPriceTrain) openingPriceToPredict = np.array([priceToPredict]) print("K = ") print(bestK) print(neighBest.predict(openingPriceToPredict))
def predict(data, priceToPredict): openingPriceTrain, openingPriceTest, closingPriceTrain, closingPriceTest = \ data["openingPriceTrain"], data["openingPriceTest"], data["closingPriceTrain"], data["closingPriceTest"] clf = svm.LinearSVR() clf.fit(openingPriceTrain, closingPriceTrain) predicted2 = clf.predict(openingPriceTest) score = clf.fit(openingPriceTrain, closingPriceTrain).score(openingPriceTest, closingPriceTest) # print(score) fig, ax = plotter.subplots() ax.scatter(openingPriceTrain, closingPriceTrain) ax.set_ylabel('Predicted SVM') ax.scatter(closingPriceTest, clf.predict(openingPriceTest)) ax.set_xlabel('Measured') ax.set_ylabel('Predicted') # plotter.show() closingPriceTestArray = np.reshape(closingPriceTest,-1) clfpr = clf.predict(openingPriceTest) predictedArray = np.reshape(clfpr,-1) print(pearsonr(closingPriceTestArray,predictedArray)) openingPriceToPredict = np.array([priceToPredict]) print(clf.predict(openingPriceToPredict)) return clf.predict(np.array([openingPriceToPredict]))
def Compare_results(test_data,Desctree): predict = [] predict = PrintClass(test_data,Desctree) test = [] # i = 0 # TruePositive = 0 # TrueNegative = 0 # FalsePositive = 0 # FalseNegative = 0 #print("check") for testrec in test_data: test.append(float(testrec[0])) #print("test",test) R2 = pearsonr(predict,test) # if testrec[0] == predict[i]: # if predict[i] == 1: # TruePositive += 1 # else: # TrueNegative += 1 # else: # if predict[i] == 1: # FalsePositive += 1 # else: # FalseNegative += 1 # # Falsecount +=1 # i = i + 1 # Accuracy = float((TruePositive + TrueNegative)/ (TruePositive+FalsePositive + TrueNegative + FalseNegative)) return(R2)
def calc_correlations(data_file, genre_column, network_metric_columns, output_path=None): dataframe = load_master_file(data_file) target_df = dataframe[genre_column] correlations = {} index = 0 for column in network_metric_columns: try: trimmed_df = dataframe.filter(items=[genre_column, column]) trimmed_df = trimmed_df[np.isfinite(trimmed_df[genre_column])] trimmed_df = trimmed_df[np.isfinite(trimmed_df[column])] trimmed_df.apply(lambda x: pd.to_numeric(x, errors='ignore')) correlations[genre_column + '_|_' + column ] = pearsonr(trimmed_df[genre_column],trimmed_df[column]) except: print "Error calculating correaltion" index += 1 # Option: Print Correlations to CSV if output_path: with open(output_path, 'wb') as csv_file: writer = csv.writer(csv_file) writer.writerow(["Variable", "pearson", "p-value"]) for key, value in correlations.items(): writer.writerow([key, value[0],value[1]]) return correlations
def solution6(): lookup = getTweets() for i in tags: for j in tags: print (pearsonr(lookup[i],lookup[j]))
def correlations(network_name): db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME) conn = db._create_connection() cursor = conn.cursor() # wikipedia graph structural statistics results = None try: results = cursor.execute('select c.curr_id, sum(c.counts) as counts from clickstream_derived c where c.link_type_derived= %s group by c.curr_id;', ("internal-link",)) results = cursor.fetchall() except MySQLdb.Error, e: print ('error retrieving xy coord for all links links %s (%d)' % (e.args[1], e.args[0])) print 'after sql load' print 'before load' wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_"+network_name+".xml.gz") print 'after load' cor = {} #for kk in ['page_rank', 'page_rank_weighted']: for kk in ['page_rank_weighted']: correlations_sem_sim_weighted_pagerank ={} #for damping in [0.8, 0.85, 0.9 ,0.95]: for damping in [0.85]: correlations={} print damping key = kk+str(damping) print key pagerank = wikipedia.vertex_properties[key] counts=[] page_rank_values=[] for row in results: counts.append(float(row[1])) page_rank_values.append(pagerank[wikipedia.vertex(int(row[0]))]) #for index, row in df.iterrows(): # counts.append(float(row['counts'])) # page_rank_values.append(pagerank[wikipedia.vertex(int(row['target_article_id']))]) print 'pearson' p = pearsonr(page_rank_values, counts) print p correlations['pearson']=p print 'spearmanr' s= spearmanr(page_rank_values, counts) print s correlations['spearmanr']=s print 'kendalltau' k= kendalltau(page_rank_values, counts) print k correlations['kendalltau']=k correlations_sem_sim_weighted_pagerank[key]=correlations cor[kk]=correlations_sem_sim_weighted_pagerank write_pickle(HOME+'output/correlations/correlations_pagerank_without_zeros'+network_name+'.obj', cor)
def find_feature_transformation(feature_name, feature_value, scores): """ Identify the best transformation based on the highest absolute Pearson correlation with human score. Parameters ---------- feature_name: str Name of feature for which to find the transformation. feature_value: pandas Series Series containing feature values. scores: pandas Series Numeric human scores. Returns ------- best_transformation: str The name of the transformation which gives the highest correlation between the feature values and the human scores. See :ref:`documentation <select_transformations_rsmtool>` for the full list of transformations. """ # Do not use sqrt and ln for potential negative features. # Do not use inv for positive features. if any(feature_value < 0): applicable_transformations = ['org', 'inv'] else: applicable_transformations = ['org', 'sqrt', 'addOneInv', 'addOneLn'] correlations = [] for trans in applicable_transformations: try: transformed_value = transform_feature(feature_name, feature_value, trans) correlations.append(abs(pearsonr(transformed_value, scores)[0])) except ValueError: # If the transformation returns an error, append 0. correlations.append(0) best = np.argmax(correlations) best_transformation = applicable_transformations[best] return best_transformation
def calc_median_angle_params(subject): """ Calculates median angle parameters of a subject Parameters ---------- subject : string Path of a subject's nifti file. Returns ------- mean_bold : float Mean bold amplitude of a subject. median_angle : float Median angle of a subject. """ import numpy as np import nibabel as nb data = nb.load(subject).get_data().astype('float64') mask = (data != 0).sum(-1) != 0 print 'Loaded ' + subject print 'Volume size ' + data.shape Y = data[mask].T print 'Data shape ' + Y.shape Yc = Y - np.tile(Y.mean(0), (Y.shape[0], 1)) Yn = Yc/np.tile(np.sqrt((Yc*Yc).sum(0)), (Yc.shape[0], 1)) U,S,Vh = np.linalg.svd(Yn, full_matrices=False) glb = (Yn/np.tile(Yn.std(0), (Y.shape[0], 1))).mean(1) from scipy.stats.stats import pearsonr corr = pearsonr(U[:,0],glb) print "PC1_glb r: " + corr PC1 = U[:,0] if corr[0] >= 0 else -U[:,0] median_angle = np.median(np.arccos(np.dot(PC1.T, Yn))) median_angle *= 180.0/np.pi Yp = Yc #/np.tile(Y.mean(0), (Y.shape[0], 1)) mean_bold = Yp.std(0).mean() print 'Median Angle ' + median_angle print 'Mean Bold ' + mean_bold return mean_bold, median_angle