static String anchorToString(final int anchor) { final StringBuffer s = new StringBuffer("["); if ((anchor & AnchorType.BEGIN_BUF) !=0 ) { s.append("begin-buf "); } if ((anchor & AnchorType.BEGIN_LINE) !=0 ) { s.append("begin-line "); } if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) { s.append("begin-pos "); } if ((anchor & AnchorType.END_BUF) !=0 ) { s.append("end-buf "); } if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) { s.append("semi-end-buf "); } if ((anchor & AnchorType.END_LINE) !=0 ) { s.append("end-line "); } if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) { s.append("anychar-star "); } if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) { s.append("anychar-star-pl "); } s.append("]"); return s.toString(); }
private int compileLengthAnchorNode(final AnchorNode node) { int tlen; if (node.target != null) { tlen = compileLengthTree(node.target); } else { tlen = 0; } int len; switch (node.type) { case AnchorType.PREC_READ: len = OPSize.PUSH_POS + tlen + OPSize.POP_POS; break; case AnchorType.PREC_READ_NOT: len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS; break; case AnchorType.LOOK_BEHIND: len = OPSize.LOOK_BEHIND + tlen; break; case AnchorType.LOOK_BEHIND_NOT: len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT; break; default: len = OPSize.OPCODE; break; } // switch return len; }
private Node divideLookBehindAlternatives(final Node nodep) { Node node = nodep; final AnchorNode an = (AnchorNode)node; final int anchorType = an.type; Node head = an.target; Node np = ((ConsAltNode)head).car; swap(node, head); final Node tmp = node; node = head; head = tmp; ((ConsAltNode)node).setCar(head); ((AnchorNode)head).setTarget(np); np = node; while ((np = ((ConsAltNode)np).cdr) != null) { final AnchorNode insert = new AnchorNode(anchorType); insert.setTarget(((ConsAltNode)np).car); ((ConsAltNode)np).setCar(insert); } if (anchorType == AnchorType.LOOK_BEHIND_NOT) { np = node; do { ((ConsAltNode)np).toListNode(); /* alt -> list */ } while ((np = ((ConsAltNode)np).cdr) != null); } return node; }
static String anchorToString(int anchor) { StringBuffer s = new StringBuffer("["); if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf "); if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line "); if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos "); if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf "); if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf "); if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line "); if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star "); if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl "); s.append("]"); return s.toString(); }
private int compileLengthAnchorNode(AnchorNode node) { int tlen; if (node.target != null) { tlen = compileLengthTree(node.target); } else { tlen = 0; } int len; switch (node.type) { case AnchorType.PREC_READ: len = OPSize.PUSH_POS + tlen + OPSize.POP_POS; break; case AnchorType.PREC_READ_NOT: len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS; break; case AnchorType.LOOK_BEHIND: len = OPSize.LOOK_BEHIND + tlen; break; case AnchorType.LOOK_BEHIND_NOT: len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT; break; default: len = OPSize.OPCODE; break; } // switch return len; }
private Node divideLookBehindAlternatives(Node node) { AnchorNode an = (AnchorNode)node; int anchorType = an.type; Node head = an.target; Node np = ((ConsAltNode)head).car; swap(node, head); Node tmp = node; node = head; head = tmp; ((ConsAltNode)node).setCar(head); ((AnchorNode)head).setTarget(np); np = node; while ((np = ((ConsAltNode)np).cdr) != null) { AnchorNode insert = new AnchorNode(anchorType); insert.setTarget(((ConsAltNode)np).car); ((ConsAltNode)np).setCar(insert); } if (anchorType == AnchorType.LOOK_BEHIND_NOT) { np = node; do { ((ConsAltNode)np).toListNode(); /* alt -> list */ } while ((np = ((ConsAltNode)np).cdr) != null); } return node; }
@Override protected void compileAnchorNode(final AnchorNode node) { int len; int n; switch (node.type) { case AnchorType.BEGIN_BUF: addOpcode(OPCode.BEGIN_BUF); break; case AnchorType.END_BUF: addOpcode(OPCode.END_BUF); break; case AnchorType.BEGIN_LINE: addOpcode(OPCode.BEGIN_LINE); break; case AnchorType.END_LINE: addOpcode(OPCode.END_LINE); break; case AnchorType.SEMI_END_BUF: addOpcode(OPCode.SEMI_END_BUF); break; case AnchorType.BEGIN_POSITION: addOpcode(OPCode.BEGIN_POSITION); break; case AnchorType.WORD_BOUND: addOpcode(OPCode.WORD_BOUND); break; case AnchorType.NOT_WORD_BOUND: addOpcode(OPCode.NOT_WORD_BOUND); break; case AnchorType.WORD_BEGIN: if (Config.USE_WORD_BEGIN_END) { addOpcode(OPCode.WORD_BEGIN); } break; case AnchorType.WORD_END: if (Config.USE_WORD_BEGIN_END) { addOpcode(OPCode.WORD_END); } break; case AnchorType.PREC_READ: addOpcode(OPCode.PUSH_POS); compileTree(node.target); addOpcode(OPCode.POP_POS); break; case AnchorType.PREC_READ_NOT: len = compileLengthTree(node.target); addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS); compileTree(node.target); addOpcode(OPCode.FAIL_POS); break; case AnchorType.LOOK_BEHIND: addOpcode(OPCode.LOOK_BEHIND); if (node.charLength < 0) { n = analyser.getCharLengthTree(node.target); if (analyser.returnCode != 0) { newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); } } else { n = node.charLength; } addLength(n); compileTree(node.target); break; case AnchorType.LOOK_BEHIND_NOT: len = compileLengthTree(node.target); addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT); if (node.charLength < 0) { n = analyser.getCharLengthTree(node.target); if (analyser.returnCode != 0) { newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); } } else { n = node.charLength; } addLength(n); compileTree(node.target); addOpcode(OPCode.FAIL_LOOK_BEHIND_NOT); break; default: newInternalException(ERR_PARSER_BUG); } // switch }
void setSubAnchor(final OptAnchorInfo anc) { subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; subAnchor |= anc.rightAnchor & AnchorType.END_LINE; }
public String optimizeInfoToString() { final StringBuilder s = new StringBuilder(); s.append("optimize: ").append(searchAlgorithm.getName()).append("\n"); s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor)); if ((anchor & AnchorType.END_BUF_MASK) != 0) { s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax)); } s.append("\n"); if (searchAlgorithm != SearchAlgorithm.NONE) { s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n"); } s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n"); s.append("threshold length: ").append(thresholdLength).append("\n"); if (exact != null) { s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n"); } else if (searchAlgorithm == SearchAlgorithm.MAP) { int n=0; for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { if (map[i] != 0) { n++; } } s.append("map: n = ").append(n).append("\n"); if (n > 0) { int c=0; s.append("["); for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { if (map[i] != 0) { if (c > 0) { s.append(", "); } c++; // TODO if (enc.isPrint(i) s.append((char)i); } } s.append("]\n"); } } return s.toString(); }
@SuppressWarnings("unused") protected final void setOptimizedInfoFromTree(final Node node) { final NodeOptInfo opt = new NodeOptInfo(); final OptEnvironment oenv = new OptEnvironment(); oenv.options = regex.options; oenv.caseFoldFlag = regex.caseFoldFlag; oenv.scanEnv = env; oenv.mmd.clear(); // ?? optimizeNodeLeft(node, opt, oenv); regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | AnchorType.BEGIN_POSITION | AnchorType.ANYCHAR_STAR | AnchorType.ANYCHAR_STAR_ML); regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF); if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { regex.anchorDmin = opt.length.min; regex.anchorDmax = opt.length.max; } if (opt.exb.length > 0 || opt.exm.length > 0) { opt.exb.select(opt.exm); if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { // !goto set_map;! regex.setOptimizeMapInfo(opt.map); regex.setSubAnchor(opt.map.anchor); } else { regex.setExactInfo(opt.exb); regex.setSubAnchor(opt.exb.anchor); } } else if (opt.map.value > 0) { // !set_map:! regex.setOptimizeMapInfo(opt.map); regex.setSubAnchor(opt.map.anchor); } else { regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; if (opt.length.max == 0) { regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; } } if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { Config.log.println(regex.optimizeInfoToString()); } }