我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用nltk.tokenize.line_tokenize()。
def words(self, lang=None, fileids=None, ignore_lines_startswith='#'): """ This module returns a list of nonbreaking prefixes for the specified language(s). >>> from nltk.corpus import nonbreaking_prefixes as nbp >>> nbp.words('en')[:10] == [u'A', u'B', u'C', u'D', u'E', u'F', u'G', u'H', u'I', u'J'] True >>> nbp.words('ta')[:5] == [u'\u0b85', u'\u0b86', u'\u0b87', u'\u0b88', u'\u0b89'] True :return: a list words for the specified language(s). """ # If *lang* in list of languages available, allocate apt fileid. # Otherwise, the function returns non-breaking prefixes for # all languages when fileids==None. if lang in self.available_langs: lang = self.available_langs[lang] fileids = ['nonbreaking_prefix.'+lang] return [line for line in line_tokenize(self.raw(fileids)) if not line.startswith(ignore_lines_startswith)]
def words(self, fileids=None): return line_tokenize(self.raw(fileids))
def words(self, fileids=None, ignore_lines_startswith='\n'): return [line for line in line_tokenize(self.raw(fileids)) if not line.startswith(ignore_lines_startswith)]