我们从Python开源项目中,提取了以下3个代码示例,用于说明如何使用jieba.initialize()。
def _index_files(storeDir, indexFile): jieba.initialize() store = SimpleFSDirectory(File(storeDir)) analyzer = SimpleAnalyzer(Version.LUCENE_CURRENT) analyzer = LimitTokenCountAnalyzer(analyzer, 1048576) config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE) writer = IndexWriter(store, config) _index_docs(indexFile, writer) print('commit index') writer.commit() writer.close() print('done')
def get_search_func(): jieba.initialize() vm_env = lucene.initVM(vmargs=['-Djava.awt.headless=true']) analyzer = SimpleAnalyzer(Version.LUCENE_CURRENT) searcher = IndexSearcher(DirectoryReader.open(SimpleFSDirectory(File(LUCENE_INDEX_DIR)))) search = search_func_factory(analyzer=analyzer, searcher=searcher, vm_env=vm_env) return search
def __init__(self, slack, custom): self.slack = slack self.rundata = custom['data'] self.colorPrint = custom['colorPrint'] self.food_dir = "data/midnight.json" self.food_dic = "data/dict.txt.big" # find midnight channel self.nochannel = False rep = self.slack.api_call("channels.list") self.channel_id = "" for c in rep['channels']: if c['name'].lower() == custom['food_channelname']: self.channel_id = c['id'] break if not self.channel_id: self.colorPrint( "No midnight channel", "Restart when midnight channel can use", color="FAIL") self.nochannel = True return jieba.set_dictionary(self.food_dic) jieba.initialize() # add and del words for word in self.rundata.get('FOOD_addword'): jieba.add_word(word) for word in self.rundata.get('FOOD_delword'): jieba.del_word(word) self.init()