Java 类org.apache.lucene.analysis.charfilter.HTMLStripCharFilter 实例源码

项目:eMonocot    文件:SearchableDaoImpl.java   
private String filter(String value) {
    StringBuilder out = new StringBuilder();
    StringReader strReader = new StringReader(value);
    try {
        HTMLStripCharFilter html = new HTMLStripCharFilter(new BufferedReader(strReader));
        char[] cbuf = new char[1024 * 10];
        while (true) {
            int count = html.read(cbuf);
            if (count == -1)
                break; // end of stream mark is -1
            if (count > 0)
                out.append(cbuf, 0, count);
        }
        html.close();
    } catch (IOException e) {
        throw new RuntimeException("Failed stripping HTML for value: "
                + value, e);
    }
    return out.toString();
}
项目:search    文件:HTMLStripTransformer.java   
private Object stripHTML(String value, String column) {
  StringBuilder out = new StringBuilder();
  StringReader strReader = new StringReader(value);
  try {
    HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader));
    char[] cbuf = new char[1024 * 10];
    while (true) {
      int count = html.read(cbuf);
      if (count == -1)
        break; // end of stream mark is -1
      if (count > 0)
        out.append(cbuf, 0, count);
    }
    html.close();
  } catch (IOException e) {
    throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
            "Failed stripping HTML for column: " + column, e);
  }
  return out.toString();
}
项目:NYBC    文件:HTMLStripTransformer.java   
private Object stripHTML(String value, String column) {
  StringBuilder out = new StringBuilder();
  StringReader strReader = new StringReader(value);
  try {
    HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader));
    char[] cbuf = new char[1024 * 10];
    while (true) {
      int count = html.read(cbuf);
      if (count == -1)
        break; // end of stream mark is -1
      if (count > 0)
        out.append(cbuf, 0, count);
    }
    html.close();
  } catch (IOException e) {
    throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
            "Failed stripping HTML for column: " + column, e);
  }
  return out.toString();
}
项目:read-open-source-code    文件:HTMLStripTransformer.java   
private Object stripHTML(String value, String column) {
  StringBuilder out = new StringBuilder();
  StringReader strReader = new StringReader(value);
  try {
    HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader));
    char[] cbuf = new char[1024 * 10];
    while (true) {
      int count = html.read(cbuf);
      if (count == -1)
        break; // end of stream mark is -1
      if (count > 0)
        out.append(cbuf, 0, count);
    }
    html.close();
  } catch (IOException e) {
    throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
            "Failed stripping HTML for column: " + column, e);
  }
  return out.toString();
}
项目:SolrTextTagger    文件:XmlInterpolationTest.java   
private String[] analyzeReturnTokens(String docText) {
  List<String> result = new ArrayList<>();

  Reader filter = new HTMLStripCharFilter(new StringReader(docText),
          Collections.singleton("unescaped"));
  WhitespaceTokenizer ts = new WhitespaceTokenizer();
  final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
  try {
    ts.setReader(filter);
    ts.reset();
    while (ts.incrementToken()) {
      result.add(termAttribute.toString());
    }
    ts.end();
  } catch (IOException e) {
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(ts);
  }
  return result.toArray(new String[result.size()]);
}
项目:solr-multilingual-analyzer    文件:HTMLScriptCharFilterHelper.java   
public static String filterHTML(Reader source) throws IOException {
    if (source == null) {
        return null;
    }
    StringBuilder builder = new StringBuilder();
    HTMLStripCharFilter reader = new HTMLStripCharFilter(source);
    int ch;
    while ((ch = reader.read()) != -1) {
        builder.append((char) ch);
    }
    return builder.toString();
}
项目:SolrTextTagger    文件:XmlInterpolationTest.java   
private int[] analyzeTagOne(String docText, String start, String end) {
  int[] result = {-1, -1};

  Reader filter = new HTMLStripCharFilter(new StringReader(docText));

  WhitespaceTokenizer ts = new WhitespaceTokenizer();
  final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
  final OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
  try {
    ts.setReader(filter);
    ts.reset();
    while (ts.incrementToken()) {
      final String termString = termAttribute.toString();
      if (termString.equals(start))
        result[0] = offsetAttribute.startOffset();
      if (termString.equals(end)) {
        result[1] = offsetAttribute.endOffset();
        return result;
      }
    }
    ts.end();
  } catch (IOException e) {
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(ts);
  }
  return result;
}
项目:elasticsearch_my    文件:HtmlStripCharFilterFactory.java   
@Override
public Reader create(Reader tokenStream) {
    return new HTMLStripCharFilter(tokenStream, escapedTags);
}
项目:Elasticsearch    文件:HtmlStripCharFilterFactory.java   
@Override
public Reader create(Reader tokenStream) {
    return new HTMLStripCharFilter(tokenStream, escapedTags);
}