Python scipy.stats.stats 模块,spearmanr() 实例源码

我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用scipy.stats.stats.spearmanr()

项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def evaluate(representation, data):
    results = []
    for (x, y), sim in data:
        results.append((representation.similarity(x, y), sim))
    actual, expected = zip(*results)
    return spearmanr(actual, expected)[0]
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def evaluate(representation, data):
    results = []
    oov = 0
    for (x, y), sim in data:
        if representation.oov(x) or representation.oov(y):
            oov += 1
#           continue
            results.append((0, sim)) 
        else:
            results.append((representation.similarity(x, y), sim))
    actual, expected = zip(*results)
    print "OOV: ", oov
    return spearmanr(actual, expected)[0]
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def correlations_ground_truth():
    print 'ground truth'
    #load network
    wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering.xml.gz")
    #read counts with zeros
    article_counts  =  pd.read_csv(TMP+'article_counts.tsv', sep='\t')
    cor = {}
    for damping in [0.8,0.9]:
        page_rank = pagerank(wikipedia, damping=damping)
        wikipedia.vertex_properties['page_rank_'+str(damping)] = page_rank
        page_rank_values = list()
        counts = list()
        correlations_values = {}
        for index, row in article_counts.iterrows():
            counts.append(float(row['counts']))
            page_rank_values.append(page_rank[wikipedia.vertex(int(row['target_article_id']))])
        print 'pearson'
        p = pearsonr(page_rank_values, counts)
        print p
        correlations_values['pearson']=p
        print 'spearmanr'
        s = spearmanr(page_rank_values, counts)
        print s
        correlations_values['spearmanr']=s
        print 'kendalltau'
        k = kendalltau(page_rank_values, counts)
        print k
        correlations_values['kendalltau']=k
        cor['page_rank_'+str(damping)]=correlations_values
    write_pickle(HOME+'output/correlations/correlations_pagerank.obj', cor)
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def correlations_weighted_unweighted(labels):
    #load network
    print 'weighted vs unweighted'
    name = '_'.join(labels)
    wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering_"+name+".xml.gz")
    #read counts with zeros

    wikipedia_u = load_graph("output/weightedpagerank/wikipedianetwork_sem_sim_distinct_links.xml.gz")
    correlations_weighted_pagerank = {}
    for label in labels:
        for damping in [0.8,0.85,0.9]:
            correlations_values={}
            key_weighted = label+"_page_rank_weighted_"+str(damping)
            pagerank_weighted = wikipedia.vertex_properties[key_weighted]
            key_unweighted = "page_rank"+str(damping)
            pagerank_unweighted = wikipedia_u.vertex_properties[key_unweighted]
            print 'pearson'
            p = pearsonr(pagerank_weighted.a, pagerank_unweighted.a)
            print p
            correlations_values['pearson']=p
            print 'spearmanr'
            s = spearmanr(pagerank_weighted.a, pagerank_unweighted.a)
            print s
            correlations_values['spearmanr']=s
            print 'kendalltau'
            k = kendalltau(pagerank_weighted.a, pagerank_unweighted.a)
            print k
            correlations_values['kendalltau']=k
            correlations_weighted_pagerank[label+str(damping)]=correlations_values

    write_pickle(HOME+'output/correlations/correlations_pagerank_weightedvsunweighted'+name+'.obj', correlations_weighted_pagerank)
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def evaluate(m, data):
    results = []
    for (x, y), sim in data:
        # print(x,y)
        if m.has_word(x) and m.has_word(y):
            # print(m.get_row(x).dot(m.get_row(y)))
            results.append((m.get_row(x).dot(m.get_row(y)), sim))
        else:
            pass
    actual, expected = zip(*results)
    return spearmanr(actual, expected)[0]
项目:ngram2vec    作者:zhezhaoa    | 项目源码 | 文件源码
def evaluate(representation, data):
    results = []
    seen_num = 0
    for (x, y), sim in data:
        if representation.similarity(x, y) is not None :
            seen_num += 1
            results.append((representation.similarity(x, y), sim))
    actual, expected = zip(*results)
    print ("seen/total: " + str(seen_num) + "/" + str(len(data)))
    return spearmanr(actual, expected)[0]
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def correlations(network_name):
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    conn = db._create_connection()
    cursor = conn.cursor()
    # wikipedia  graph  structural statistics

    results = None
    try:
        results = cursor.execute('select c.curr_id,  sum(c.counts) as counts from clickstream_derived c where c.link_type_derived= %s  group by c.curr_id;', ("internal-link",))
        results = cursor.fetchall()


    except MySQLdb.Error, e:
        print ('error retrieving xy coord for all links links %s (%d)' % (e.args[1], e.args[0]))
    print 'after sql load'


    print 'before load'
    wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_"+network_name+".xml.gz")
    print 'after load'
    cor = {}
    #for kk in ['page_rank', 'page_rank_weighted']:
    for kk in ['page_rank_weighted']:
        correlations_sem_sim_weighted_pagerank ={}
        #for damping in [0.8, 0.85, 0.9 ,0.95]:
        for damping in [0.85]:
            correlations={}
            print damping
            key = kk+str(damping)
            print key
            pagerank = wikipedia.vertex_properties[key]
            counts=[]
            page_rank_values=[]
            for row in results:
                counts.append(float(row[1]))
                page_rank_values.append(pagerank[wikipedia.vertex(int(row[0]))])
            #for index, row in df.iterrows():
            #    counts.append(float(row['counts']))
            #    page_rank_values.append(pagerank[wikipedia.vertex(int(row['target_article_id']))])
            print 'pearson'
            p = pearsonr(page_rank_values, counts)
            print p
            correlations['pearson']=p
            print 'spearmanr'
            s= spearmanr(page_rank_values, counts)
            print s
            correlations['spearmanr']=s
            print 'kendalltau'
            k= kendalltau(page_rank_values, counts)
            print k
            correlations['kendalltau']=k
            correlations_sem_sim_weighted_pagerank[key]=correlations
        cor[kk]=correlations_sem_sim_weighted_pagerank


    write_pickle(HOME+'output/correlations/correlations_pagerank_without_zeros'+network_name+'.obj', cor)
项目:vmfmix    作者:askerlee    | 项目源码 | 文件源码
def evaluate_sim(model, testsets, testsetNames, getAbsentWords=False, vocab_dict=None, cutPoint=-1 ):
    # words in absentModelID2Word and words in absentVocabWords don't overlap

    # words in the vocab but not in the model
    absentModelID2Word = {}
    # words not in the vocab (of coz not in the model)
    absentVocabWords = {}
    # words in the vocab but below the cutPoint (id > cutPoint), may be in or out of the model
    cutVocabWords = {}
    # a set of spearman coeffs, in the same order as in testsets
    spearmanCoeff = []

    for i,testset in enumerate(testsets):
        modelResults = []
        groundtruth = []

        for x, y, sim in testset:
            if vocab_dict and x in vocab_dict:
                xid = vocab_dict[x][0]
                if cutPoint > 0 and xid > cutPoint:
                    cutVocabWords[x] = 1

            if vocab_dict and y in vocab_dict:
                yid = vocab_dict[y][0]
                if cutPoint > 0 and yid > cutPoint:
                    cutVocabWords[y] = 1

            if x not in model:
                if getAbsentWords and x in vocab_dict:
                    absentModelID2Word[xid] = x
                else:
                    absentVocabWords[x] = 1
            elif y not in model:
                if getAbsentWords and y in vocab_dict:
                    absentModelID2Word[yid] = y
                else:
                    absentVocabWords[y] = 1
            else:
                modelResults.append( model.similarity(x, y) )
                groundtruth.append(sim)
                #print "%s %s: %.3f %.3f" %(x, y, modelResults[-1], sim)
        print "%s: %d test pairs, %d valid" %( testsetNames[i], len(testset), len(modelResults) ),
        spearmanCoeff.append( spearmanr(modelResults, groundtruth)[0] )
        print ", %.5f" %spearmanCoeff[-1]

    # return hashes directly, for ease of merge
    return spearmanCoeff, absentModelID2Word, absentVocabWords, cutVocabWords

# vocab_dict is a vocabulary dict, usually bigger than model.vocab, loaded from a unigram file
# its purpose is to find absent words in the model