我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用scipy.stats.stats.spearmanr()。
def evaluate(representation, data): results = [] for (x, y), sim in data: results.append((representation.similarity(x, y), sim)) actual, expected = zip(*results) return spearmanr(actual, expected)[0]
def evaluate(representation, data): results = [] oov = 0 for (x, y), sim in data: if representation.oov(x) or representation.oov(y): oov += 1 # continue results.append((0, sim)) else: results.append((representation.similarity(x, y), sim)) actual, expected = zip(*results) print "OOV: ", oov return spearmanr(actual, expected)[0]
def correlations_ground_truth(): print 'ground truth' #load network wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering.xml.gz") #read counts with zeros article_counts = pd.read_csv(TMP+'article_counts.tsv', sep='\t') cor = {} for damping in [0.8,0.9]: page_rank = pagerank(wikipedia, damping=damping) wikipedia.vertex_properties['page_rank_'+str(damping)] = page_rank page_rank_values = list() counts = list() correlations_values = {} for index, row in article_counts.iterrows(): counts.append(float(row['counts'])) page_rank_values.append(page_rank[wikipedia.vertex(int(row['target_article_id']))]) print 'pearson' p = pearsonr(page_rank_values, counts) print p correlations_values['pearson']=p print 'spearmanr' s = spearmanr(page_rank_values, counts) print s correlations_values['spearmanr']=s print 'kendalltau' k = kendalltau(page_rank_values, counts) print k correlations_values['kendalltau']=k cor['page_rank_'+str(damping)]=correlations_values write_pickle(HOME+'output/correlations/correlations_pagerank.obj', cor)
def correlations_weighted_unweighted(labels): #load network print 'weighted vs unweighted' name = '_'.join(labels) wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering_"+name+".xml.gz") #read counts with zeros wikipedia_u = load_graph("output/weightedpagerank/wikipedianetwork_sem_sim_distinct_links.xml.gz") correlations_weighted_pagerank = {} for label in labels: for damping in [0.8,0.85,0.9]: correlations_values={} key_weighted = label+"_page_rank_weighted_"+str(damping) pagerank_weighted = wikipedia.vertex_properties[key_weighted] key_unweighted = "page_rank"+str(damping) pagerank_unweighted = wikipedia_u.vertex_properties[key_unweighted] print 'pearson' p = pearsonr(pagerank_weighted.a, pagerank_unweighted.a) print p correlations_values['pearson']=p print 'spearmanr' s = spearmanr(pagerank_weighted.a, pagerank_unweighted.a) print s correlations_values['spearmanr']=s print 'kendalltau' k = kendalltau(pagerank_weighted.a, pagerank_unweighted.a) print k correlations_values['kendalltau']=k correlations_weighted_pagerank[label+str(damping)]=correlations_values write_pickle(HOME+'output/correlations/correlations_pagerank_weightedvsunweighted'+name+'.obj', correlations_weighted_pagerank)
def evaluate(m, data): results = [] for (x, y), sim in data: # print(x,y) if m.has_word(x) and m.has_word(y): # print(m.get_row(x).dot(m.get_row(y))) results.append((m.get_row(x).dot(m.get_row(y)), sim)) else: pass actual, expected = zip(*results) return spearmanr(actual, expected)[0]
def evaluate(representation, data): results = [] seen_num = 0 for (x, y), sim in data: if representation.similarity(x, y) is not None : seen_num += 1 results.append((representation.similarity(x, y), sim)) actual, expected = zip(*results) print ("seen/total: " + str(seen_num) + "/" + str(len(data))) return spearmanr(actual, expected)[0]
def correlations(network_name): db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME) conn = db._create_connection() cursor = conn.cursor() # wikipedia graph structural statistics results = None try: results = cursor.execute('select c.curr_id, sum(c.counts) as counts from clickstream_derived c where c.link_type_derived= %s group by c.curr_id;', ("internal-link",)) results = cursor.fetchall() except MySQLdb.Error, e: print ('error retrieving xy coord for all links links %s (%d)' % (e.args[1], e.args[0])) print 'after sql load' print 'before load' wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_"+network_name+".xml.gz") print 'after load' cor = {} #for kk in ['page_rank', 'page_rank_weighted']: for kk in ['page_rank_weighted']: correlations_sem_sim_weighted_pagerank ={} #for damping in [0.8, 0.85, 0.9 ,0.95]: for damping in [0.85]: correlations={} print damping key = kk+str(damping) print key pagerank = wikipedia.vertex_properties[key] counts=[] page_rank_values=[] for row in results: counts.append(float(row[1])) page_rank_values.append(pagerank[wikipedia.vertex(int(row[0]))]) #for index, row in df.iterrows(): # counts.append(float(row['counts'])) # page_rank_values.append(pagerank[wikipedia.vertex(int(row['target_article_id']))]) print 'pearson' p = pearsonr(page_rank_values, counts) print p correlations['pearson']=p print 'spearmanr' s= spearmanr(page_rank_values, counts) print s correlations['spearmanr']=s print 'kendalltau' k= kendalltau(page_rank_values, counts) print k correlations['kendalltau']=k correlations_sem_sim_weighted_pagerank[key]=correlations cor[kk]=correlations_sem_sim_weighted_pagerank write_pickle(HOME+'output/correlations/correlations_pagerank_without_zeros'+network_name+'.obj', cor)
def evaluate_sim(model, testsets, testsetNames, getAbsentWords=False, vocab_dict=None, cutPoint=-1 ): # words in absentModelID2Word and words in absentVocabWords don't overlap # words in the vocab but not in the model absentModelID2Word = {} # words not in the vocab (of coz not in the model) absentVocabWords = {} # words in the vocab but below the cutPoint (id > cutPoint), may be in or out of the model cutVocabWords = {} # a set of spearman coeffs, in the same order as in testsets spearmanCoeff = [] for i,testset in enumerate(testsets): modelResults = [] groundtruth = [] for x, y, sim in testset: if vocab_dict and x in vocab_dict: xid = vocab_dict[x][0] if cutPoint > 0 and xid > cutPoint: cutVocabWords[x] = 1 if vocab_dict and y in vocab_dict: yid = vocab_dict[y][0] if cutPoint > 0 and yid > cutPoint: cutVocabWords[y] = 1 if x not in model: if getAbsentWords and x in vocab_dict: absentModelID2Word[xid] = x else: absentVocabWords[x] = 1 elif y not in model: if getAbsentWords and y in vocab_dict: absentModelID2Word[yid] = y else: absentVocabWords[y] = 1 else: modelResults.append( model.similarity(x, y) ) groundtruth.append(sim) #print "%s %s: %.3f %.3f" %(x, y, modelResults[-1], sim) print "%s: %d test pairs, %d valid" %( testsetNames[i], len(testset), len(modelResults) ), spearmanCoeff.append( spearmanr(modelResults, groundtruth)[0] ) print ", %.5f" %spearmanCoeff[-1] # return hashes directly, for ease of merge return spearmanCoeff, absentModelID2Word, absentVocabWords, cutVocabWords # vocab_dict is a vocabulary dict, usually bigger than model.vocab, loaded from a unigram file # its purpose is to find absent words in the model