Python wikipedia 模块,search() 实例源码

我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用wikipedia.search()

项目:Abb1t    作者:k-freeman    | 项目源码 | 文件源码
def run(self):
        while True:
            msg = self.queue_in.get()  # get() is blocking
            match = re.search(r'^(?:/|!)wiki (.*)$', msg.get_text().lower())
            if match:
                reply = ""
                try:
                    related_entries = wikipedia.search(match.group(1))
                    w = wikipedia.page(match.group(1))
                    reply1 = "*{}*\n".format(w.title)
                    reply2 = "{}\n".format(w.summary)
                    reply3 = "\n*related topics*:\n- {}".format("\n- ".join(related_entries))

                    if len(reply1+reply2+reply3)>4096:
                        reply = reply1 + reply2[:4092-len(reply1)-len(reply3)]+"...\n" + reply3 # shortening to 4096 characters
                    else:
                        reply = reply1+reply2+reply3
                except wikipedia.DisambiguationError as e:
                    related_entries = str(e).split(":",1)[1].split("\n")[1:]
                    reply = "This was too inspecific. Choose one from these:\n- {}".format("\n- ".join(related_entries))
                except:
                    reply = "No matches returned for this request."
                if reply:
                    self.bot.sendMessage(msg.get_chat_id(), reply, parse_mode="Markdown")
项目:LDA-REST    作者:valentinarho    | 项目源码 | 文件源码
def label_topic_by_probability(cls, topic_description, min_word_probability=0.010, max_words=6):
        """
        Try to disambiguate a topic considering all words with a weight greater than min_word_probability
        :param max_words:
        :param topic_description: is a list of pairs  (word, word_probability)
        :param min_word_probability: is the minimum probability for words
        :return: list of strings, possible wikipedia pages
        """
        words = [w for w, p in topic_description if p >= min_word_probability]
        words = words[:max_words]

        if len(words) == 0:
            # if no words are over the threshold return empty
            res = []
        else:
            res = wikipedia.search(' '.join(words))

        return res
项目:chalice-linebot    作者:c-bata    | 项目源码 | 文件源码
def wikipedia_search(word):
    """Search a word meaning on wikipedia."""
    wikipedia.set_lang('ja')
    results = wikipedia.search(word)

    # get first result
    if results:
        page = wikipedia.page(results[0])
        msg = page.title + "\n" + page.url
    else:
        msg = '`{}` ??????????????'.format(word)
    return msg


# ====================================
# Google News
# ====================================
项目:AcronymExpansion    作者:adityathakker    | 项目源码 | 文件源码
def get_pages(query):
    pages = list()
    if len(query.strip()) <= 0:
        raise ValueError

    response = requests.get(SEARCH_URL + str(query))
    soup = BeautifulSoup(markup=response.text, features="lxml")

    if soup is None:
        raise Exception

    if "search" in str(soup.title).lower():
        result_ul = soup.find(name="ul", attrs={"class": "mw-search-results"})
        results_list = result_ul.find_all("li")

        for li in results_list:
            li_div = li.find(name="div", attrs={"class": "mw-search-result-heading"})
            a = li_div.find("a")
            link = "https://en.wikipedia.org" + a["href"]
            heading = str(a.text)
            pages.append((link, heading))

        return pages
    else:
        return wikipedia.summary(query)
项目:Microsoft-chatbot    作者:ahmadfaizalbh    | 项目源码 | 文件源码
def about(query,qtype=None):
    service_url = 'https://kgsearch.googleapis.com/v1/entities:search'
    params = {
        'query': query,
        'limit': 10,
        'indent': True,
        'key': api_key,
    }
    url = service_url + '?' + urllib.urlencode(params)
    response = json.loads(urllib.urlopen(url).read())
    if not len(response['itemListElement']):
        return "sorry, I don't know about "+query +"\nIf you know about "+query+" please tell me."
    result = ""
    if len(response['itemListElement'])==1:
        if "detailedDescription" in response['itemListElement'][0]['result']:
            return response['itemListElement'][0]['result']['detailedDescription']["articleBody"]
        else:
            return response['itemListElement'][0]['result']['name'] +" is a " +\
                   response['itemListElement'][0]['result']["description"]
    for element in response['itemListElement']:
      try:result += element['result']['name'] + "->" +element['result']["description"]+"\n"
      except:pass
    return result
项目:cdata    作者:cnschema    | 项目源码 | 文件源码
def wikipedia_search_slow(query, lang="en", max_result=1):
    import wikipedia
    #wikification
    query = any2unicode(query)
    items = []
    ret = {"query":query, "itemList":items}
    wikipedia.set_lang(lang)
    wikiterm = wikipedia.search(query)
    #logging.info(wikiterm)
    for idx, term in enumerate(wikiterm[0:max_result]):
        wikipage = wikipedia.page(term)
        item = {
            "name": wikipage.title,
            "description": wikipedia.summary(term, sentences=1),
            "url": wikipage.url,
        }
        items.append(item)

    return ret
项目:cnschema    作者:cnschema    | 项目源码 | 文件源码
def wikify3(phrase, description=None):
    ret = {}
    urlBase = "https://en.wikipedia.org/w/api.php?action=opensearch&format=json&formatversion=2&search={}&namespace=0&limit=10&suggest=true"
    url = urlBase.format(re.sub("\s+","%20",phrase))
    r = requests.get(url)
    jsonData = json.loads(r.content)
    #logging.info(items)
    ret = {}
    for idx, label in enumerate(jsonData[1][0:MAX_RESULT]):
        description = jsonData[2][idx]
        url = jsonData[3][idx]
        #if "refer to:" in description:
        #    continue

        if idx > 0:
            prefix = "wikipedia{}".format(idx+1)
        else:
            prefix = "wikipedia"
        ret["{}Label".format(prefix)] = label
        ret["{}Description".format(prefix)] = description
        ret["{}Url".format(prefix)] = url
    return ret
项目:LDA-REST    作者:valentinarho    | 项目源码 | 文件源码
def label_topic_by_number_of_words(cls, topic_description, n_words=5):
        """
        Try to disambiguate a topic considering top k words in its description
        :param n_words:
        :param topic_description: is a list of pairs  (word, word_probability)
        :return: list of strings, possible wikipedia pages
        """
        words = [t[0] for i, t in enumerate(topic_description) if i < n_words]

        if len(words) == 0:
            # if no words are over the threshold, take the first
            words = [topic_description[0][0]]

        res = wikipedia.search(' '.join(words))
        return res
项目:Microsoft-chatbot    作者:ahmadfaizalbh    | 项目源码 | 文件源码
def whatIs(query,sessionID="general"):
    try:
        return wikipedia.summary(query)
    except:
        for newquery in wikipedia.search(query):
            try:
                return wikipedia.summary(newquery)
            except:
                pass
    return about(query)
项目:Microsoft-chatbot    作者:ahmadfaizalbh    | 项目源码 | 文件源码
def whoIs(query,sessionID="general"):
    try:
        return wikipedia.summary(query)
    except:
        for newquery in wikipedia.search(query):
            try:
                return wikipedia.summary(newquery)
            except:
                pass
    return "I don't know about "+query
项目:tuxbot-bot    作者:outout14    | 项目源码 | 文件源码
def _search(self, ctx):
        """Rechercher sur le world wide web"""
        if ctx.invoked_subcommand is None:
            text = open('texts/search.md').read()
            em = discord.Embed(title='Commandes de search TuxBot', description=text, colour=0x89C4F9)
            await ctx.send(embed=em)
项目:jessy    作者:jessy-project    | 项目源码 | 文件源码
def ask_wikipedia(self, definition):
        '''
        Ask Wikipedia for the definition.

        :param definition:
        :return:
        '''
        # TODO: this method should run in a separate process, asynchronously

        is_exact = False
        out = []
        if not wikipedia:
            return is_exact, out

        page_titles = wikipedia.search(definition)
        page = None
        if page_titles:
            for page_title in page_titles:
                if page_title.lower() == definition:
                    try:
                        page = wikipedia.page(page_title)
                        is_exact = True
                    except DisambiguationError as ex:
                        out.append(Phrase().text('This can refer to a many things, such as {0}'.format(self.join_for_more(ex.options, limit=None))))
                        return is_exact, out

            if not page and 'disambiguation' not in page_titles[0]:
                try:
                    page = wikipedia.page(page_titles[0])
                except Exception as ex:
                    out.append(Phrase().text(str(ex)))

        if page and not out:
            out.append(Phrase().text(page.content.split('==')[0]
                                     .split('\n')[0]
                                     .encode('utf-8', 'ignore')).pause(1))
        return is_exact, out
项目:pyconjpbot    作者:pyconjp    | 项目源码 | 文件源码
def wikipedia_page(message, option, query):
    """
    Wikipedia??????????
    """
    if query == 'help':
        return

    # set language
    lang = 'ja'
    if option:
        _, lang = option.split('-')
    wikipedia.set_lang(lang)

    try:
        # search with query
        results = wikipedia.search(query)
    except:
        botsend(message, '??????? `{}` ???????'.format(lang))
        return

    # get first result
    if results:
        page = wikipedia.page(results[0])

        attachments = [{
            'fallback': 'Wikipedia: {}'.format(page.title),
            'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title),
            'text': page.summary,
        }]
        botwebapi(message, attachments)
    else:
        botsend(message, '`{}` ??????????????'.format(query))
项目:cdata    作者:cnschema    | 项目源码 | 文件源码
def wikipedia_search(query, lang="en", max_result=1):
    """
        https://www.mediawiki.org/wiki/API:Opensearch
    """
    query = any2unicode(query)
    params = {
        "action":"opensearch",
        "search": query,
        "format":"json",
        #"formatversion":2,
        #"namespace":0,
        "suggest":"true",
        "limit": 10
    }
    urlBase = "https://{}.wikipedia.org/w/api.php?".format(lang)
    url = urlBase + urllib.urlencode(any2utf8(params))
    #logging.info(url)
    r = requests.get(url)
    jsonData = json.loads(r.content)
    #logging.info(jsonData)

    items = []
    ret = {"query":query, "itemList":items}
    for idx, label in enumerate(jsonData[1][0:max_result]):
        description = jsonData[2][idx]
        url = jsonData[3][idx]

        item = {
            "name": label,
            "description":description,
            "url": url,
        }
        items.append(item)

    return ret
项目:Chatbot    作者:ahmadfaizalbh    | 项目源码 | 文件源码
def whoIs(query,sessionID="general"):
    try:
        return wikipedia.summary(query)
    except:
        for newquery in wikipedia.search(query):
            try:
                return wikipedia.summary(newquery)
            except:
                pass
    return "I don't know about "+query
项目:Chatbot    作者:ahmadfaizalbh    | 项目源码 | 文件源码
def whoIs(query,sessionID="general"):
    try:
        return wikipedia.summary(query)
    except:
        for newquery in wikipedia.search(query):
            try:
                return wikipedia.summary(newquery)
            except:
                pass
    return "I don't know about "+query
项目:WikiSummary    作者:Mikerah    | 项目源码 | 文件源码
def get_wanted_article(search_term):
    """Given a search term, find the associated article"""
    search_term = " ".join(search_term)
    try:
        list_of_associated_articles = wikipedia.search(search_term)
        wanted_article = list_of_associated_articles[0]
        print(wikipedia.summary(wanted_article))
    except wikipedia.exceptions.DisambiguationError as disambiguation:
        sys.exit("Unfortunately your request has led to a disambiguation, "
                 "please refine your search further:\n{}".format(disambiguation))
项目:cnschema    作者:cnschema    | 项目源码 | 文件源码
def wikify1(phrase, description=None):

    #wikification
    """
    {
        searchinfo: - {
        search: "birthday"
        },
        search: - [
        - {
            repository: "",
            id: "P3150",
            concepturi: "http://www.wikidata.org/entity/P3150",
            url: "//www.wikidata.org/wiki/Property:P3150",
            title: "Property:P3150",
            pageid: 28754653,
            datatype: "wikibase-item",
            label: "birthday",
            description: "item for day and month on which the subject was born. Used when full "date of birth" (P569) isn't known.",
            match: - {
            type: "label",
            language: "en",
            text: "birthday"
        }
    },"""
    urlBase = "https://www.wikidata.org/w/api.php?action=wbsearchentities&search={}&format=json&language=en&uselang=en&type=property"
    url = urlBase.format(re.sub("\s+","%20",phrase))
    r = requests.get(url)
    items = json.loads(r.content).get("search",[])
    #logging.info(items)
    ret = {}
    for idx, item in enumerate(items[0:MAX_RESULT]):
        if idx > 0:
            prefix = "wikidata{}".format(idx+1)
        else:
            prefix = "wikidata"
        ret["{}Id".format(prefix)] = item["id"]
        ret["{}Name".format(prefix)] = item.get("label","")
        ret["{}Description".format(prefix)] = item.get("description","")
        ret["{}Url".format(prefix)] = item["concepturi"]
    return ret
项目:cnschema    作者:cnschema    | 项目源码 | 文件源码
def wikify2(phrase, description=None):
    #wikification
    ret = {}
    wikiterm = wikipedia.search(phrase)
    for idx, term in enumerate(wikiterm[0:MAX_RESULT]):
        wikipage = wikipedia.page(term)
        ret["wikipedia_{}_url".format(idx)] = wikipage.url
        ret["wikipedia_{}_desc".format(idx)] = wikipedia.summary(term, sentences=1)

    return ret
项目:JshBot-legacy    作者:jkchen2    | 项目源码 | 文件源码
def wikipedia_query(query, simple_result=False):
    if not query:
        return "Try searching for *something* next time, knucklehead."
    try:
        page = wikipedia.page(query, auto_suggest=True)
        if simple_result: # Just return the url of the found page
            return page.url
        else: # Return the first ~500 characters of the summary
            title = page.title
            summary = page.summary
            for i in range(0, (len(summary) if len(summary) < 500 else 500) - 1):
                if summary[i] == '=' and summary[i+1] == '=':
                    summary = summary[0:i]
                    break;
            if len(summary) >= 500:
                summary = summary[0:500]
                summary += ' ...*`[truncated]`*'
            return "***```{title}```***\n{summary}".format(title=title, summary=summary)
    except wikipedia.exceptions.PageError:
        raise bot_exception(WIKIPEDIA_EXCEPTION, 
            "Page doesn't exist. Trying for some suggestions...", '```{}```'.format(
            (wikipedia.suggest(query) if wikipedia.suggest(query) is not None else "None")))
    except wikipedia.exceptions.DisambiguationError as tp: # Try to get list of suggestions
        suggestions = wikipedia.search(query, results=5)
        if len(suggestions) > 0:
            formatted_suggestions = '```\n'
            for suggestion in suggestions:
                formatted_suggestions += '{}\n'.format(suggestion)
            formatted_suggestions += '```'
            raise bot_exception(WIKIPEDIA_EXCEPTION, "Query is too ambiguous. Here are some suggestions:", formatted_suggestions)
        else:
            raise bot_exception(WIKIPEDIA_EXCEPTION, "Query is too ambiguous. No suggestions found.")
项目:Jarvis    作者:sukeesh    | 项目源码 | 文件源码
def search(query, count=10, suggestion=False):
    """Do a Wikipedia search for a query, returns a list of 10 related items."""
    items = wikipedia.search(query, count, suggestion)
    if isinstance(items, list) and len(items) > 0:
        return items
    return "No articles with that name, try another item."
项目:tuxbot-bot    作者:outout14    | 项目源码 | 文件源码
def search_wikipedia(self, ctx: commands.Context, args):
        """Fait une recherche sur wikipd"""

        wait = await ctx.send("_Je cherche..._")
        results = wikipedia.search(args)
        nbmr = 0
        mmssgg = ""

        for value in results:
            nbmr = nbmr + 1
            mmssgg = mmssgg + "**{}**: {} \n".format(str(nbmr), value)

        em = discord.Embed(title='Résultats de : ' + args, description = mmssgg, colour=0x4ECDC4)
        em.set_thumbnail(url = "https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png")
        await wait.delete()

        sending = ["1?", "2?", "3?", "4?", "5?", "6?", "7?", "8?", "9?", "??"]

        def check(reaction, user):
            return user == ctx.author and reaction.emoji in sending and reaction.message.id == msg.id

        async def waiter(future: asyncio.Future):
            reaction, user = await self.bot.wait_for('reaction_add', check=check)
            future.set_result(reaction.emoji)

        emoji = asyncio.Future()
        self.bot.loop.create_task(waiter(emoji))

        msg = await ctx.send(embed=em)
        for e in sending:
            await msg.add_reaction(e)
            if emoji.done():
                break

        while not emoji.done():
            await asyncio.sleep(0.1)

        sPage = int(sending.index(emoji.result()))

        args_ = results[sPage]

        try:
            await msg.delete()
            await ctx.trigger_typing()
            wait = await ctx.send(ctx.message.author.mention + " ah ok sympa cette recherche, je l'effectue de suite !")
            wp = wikipedia.page(args_)
            wp_contenu = wp.summary[:200] + "..."
            em = discord.Embed(title='Wikipedia : ' + wp.title, description = "{} \n_Lien_ : {} ".format(wp_contenu, wp.url), colour=0x9B59B6)
            em.set_author(name="Wikipedia", url='http://wikipedia.org', icon_url='https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png')
            em.set_thumbnail(url = "https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png")
            em.set_footer(text="Merci à eux de nous fournir une encyclopédie libre !")
            await wait.delete()
            await ctx.send(embed=em)

        except wikipedia.exceptions.PageError: #TODO : A virer dans l'event on_error
            await ctx.send(":open_mouth: Une **erreur interne** est survenue, si cela ce reproduit contactez votre administrateur ou faites une Issue sur ``github`` !")
项目:cdata    作者:cnschema    | 项目源码 | 文件源码
def wikidata_search(query, lang="zh", output_lang="en",  searchtype="item",  max_result=1):
    """
        wikification: search wikipedia pages for the given query
        https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities

        result format
        {
            searchinfo: - {
            search: "birthday"
            },
            search: - [
            - {
                repository: "",
                id: "P3150",
                concepturi: "http://www.wikidata.org/entity/P3150",
                url: "//www.wikidata.org/wiki/Property:P3150",
                title: "Property:P3150",
                pageid: 28754653,
                datatype: "wikibase-item",
                label: "birthday",
                description: "item for day and month on which the subject was born. Used when full "date of birth" (P569) isn't known.",
                match: - {
                type: "label",
                language: "en",
                text: "birthday"
            }
        }
    """
    query = any2unicode(query)
    params = {
        "action":"wbsearchentities",
        "search": query,
        "format":"json",
        "language":lang,
        "uselang":output_lang,
        "type":searchtype
    }
    urlBase = "https://www.wikidata.org/w/api.php?"
    url = urlBase + urllib.urlencode(any2utf8(params))
    #logging.info(url)
    r = requests.get(url)
    results = json.loads(r.content).get("search",[])
    #logging.info(items)

    property_list = [
        {"name":"name", "alternateName":["label"]},
        {"name":"url", "alternateName":["concepturi"]},
        {"name":"identifier", "alternateName":["id"]},
        {"name":"description"},
    ]
    items = []
    ret = {"query": query, "itemList":items}
    for result in results[0:max_result]:
        #logging.info(result)
        item = json_dict_copy(result, property_list)
        items.append(item)
    return ret