To: vim_dev@googlegroups.com Subject: Patch 7.3.1139 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1139 Problem: New regexp engine: negated flag is hardly used. Solution: Add separate _NEG states, remove negated flag. Files: src/regexp_nfa.c, src/regexp.h *** ../vim-7.3.1138/src/regexp_nfa.c 2013-06-07 14:59:14.000000000 +0200 --- src/regexp_nfa.c 2013-06-07 16:31:29.000000000 +0200 *************** *** 64,72 **** --- 64,75 ---- NFA_NOPEN, /* Start of subexpression marked with \%( */ NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ NFA_START_INVISIBLE, + NFA_START_INVISIBLE_NEG, NFA_START_INVISIBLE_BEFORE, + NFA_START_INVISIBLE_BEFORE_NEG, NFA_START_PATTERN, NFA_END_INVISIBLE, + NFA_END_INVISIBLE_NEG, NFA_END_PATTERN, NFA_COMPOSING, /* Next nodes in NFA are part of the composing multibyte char */ *************** *** 481,487 **** } default: ! if (p->c > 0 && !p->negated) return p->c; /* yes! */ return 0; } --- 484,490 ---- } default: ! if (p->c > 0) return p->c; /* yes! */ return 0; } *************** *** 1991,2000 **** --- 1994,2008 ---- case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; + case NFA_START_INVISIBLE_NEG: + STRCPY(code, "NFA_START_INVISIBLE_NEG"); break; case NFA_START_INVISIBLE_BEFORE: STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break; + case NFA_START_INVISIBLE_BEFORE_NEG: + STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break; case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break; case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; + case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); break; case NFA_END_PATTERN: STRCPY(code, "NFA_END_PATTERN"); break; case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; *************** *** 2227,2234 **** fprintf(debugf, " %s", p); nfa_set_code(state->c); ! fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n", ! state->negated ? "NOT " : "", code, state->c, abs(state->id), --- 2235,2241 ---- fprintf(debugf, " %s", p); nfa_set_code(state->c); ! fprintf(debugf, "%s (%d) (id=%d) val=%d\n", code, state->c, abs(state->id), *************** *** 2330,2336 **** s->id = istate; s->lastlist[0] = 0; s->lastlist[1] = 0; - s->negated = FALSE; return s; } --- 2337,2342 ---- *************** *** 2741,2763 **** case NFA_PREV_ATOM_JUST_BEFORE_NEG: case NFA_PREV_ATOM_LIKE_PATTERN: { - int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG - || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); int before = (*p == NFA_PREV_ATOM_JUST_BEFORE || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); ! int start_state = NFA_START_INVISIBLE; ! int end_state = NFA_END_INVISIBLE; int n = 0; nfa_state_T *zend; nfa_state_T *skip; ! if (before) ! start_state = NFA_START_INVISIBLE_BEFORE; ! else if (pattern) { ! start_state = NFA_START_PATTERN; ! end_state = NFA_END_PATTERN; } if (before) --- 2747,2783 ---- case NFA_PREV_ATOM_JUST_BEFORE_NEG: case NFA_PREV_ATOM_LIKE_PATTERN: { int before = (*p == NFA_PREV_ATOM_JUST_BEFORE || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); ! int start_state; ! int end_state; int n = 0; nfa_state_T *zend; nfa_state_T *skip; ! switch (*p) { ! case NFA_PREV_ATOM_NO_WIDTH: ! start_state = NFA_START_INVISIBLE; ! end_state = NFA_END_INVISIBLE; ! break; ! case NFA_PREV_ATOM_NO_WIDTH_NEG: ! start_state = NFA_START_INVISIBLE_NEG; ! end_state = NFA_END_INVISIBLE_NEG; ! break; ! case NFA_PREV_ATOM_JUST_BEFORE: ! start_state = NFA_START_INVISIBLE_BEFORE; ! end_state = NFA_END_INVISIBLE; ! break; ! case NFA_PREV_ATOM_JUST_BEFORE_NEG: ! start_state = NFA_START_INVISIBLE_BEFORE_NEG; ! end_state = NFA_END_INVISIBLE_NEG; ! break; ! case NFA_PREV_ATOM_LIKE_PATTERN: ! start_state = NFA_START_PATTERN; ! end_state = NFA_END_PATTERN; ! break; } if (before) *************** *** 2783,2793 **** s = alloc_state(start_state, e.start, s1); if (s == NULL) goto theend; - if (neg) - { - s->negated = TRUE; - s1->negated = TRUE; - } if (before) s->val = n; /* store the count */ if (pattern) --- 2803,2808 ---- *************** *** 3009,3015 **** matchstate = &state_ptr[istate++]; /* the match state */ matchstate->c = NFA_MATCH; matchstate->out = matchstate->out1 = NULL; - matchstate->negated = FALSE; matchstate->id = 0; patch(e.out, matchstate); --- 3024,3029 ---- *************** *** 3772,3778 **** return OK; break; case NFA_CLASS_SPACE: ! if ((c >=9 && c <= 13) || (c == ' ')) return OK; break; case NFA_CLASS_UPPER: --- 3786,3792 ---- return OK; break; case NFA_CLASS_SPACE: ! if ((c >= 9 && c <= 13) || (c == ' ')) return OK; break; case NFA_CLASS_UPPER: *************** *** 3971,3977 **** int result; int need_restore = FALSE; ! if (state->c == NFA_START_INVISIBLE_BEFORE) { /* The recursive match must end at the current position. */ endposp = &endpos; --- 3985,3992 ---- int result; int need_restore = FALSE; ! if (state->c == NFA_START_INVISIBLE_BEFORE ! || state->c == NFA_START_INVISIBLE_BEFORE_NEG) { /* The recursive match must end at the current position. */ endposp = &endpos; *************** *** 4452,4457 **** --- 4467,4473 ---- } case NFA_END_INVISIBLE: + case NFA_END_INVISIBLE_NEG: case NFA_END_PATTERN: /* * This is only encountered after a NFA_START_INVISIBLE or *************** *** 4489,4495 **** break; /* do not set submatches for \@! */ ! if (!t->state->negated) { copy_sub(&m->norm, &t->subs.norm); #ifdef FEAT_SYN_HL --- 4505,4511 ---- break; /* do not set submatches for \@! */ ! if (t->state->c != NFA_END_INVISIBLE_NEG) { copy_sub(&m->norm, &t->subs.norm); #ifdef FEAT_SYN_HL *************** *** 4505,4511 **** --- 4521,4529 ---- break; case NFA_START_INVISIBLE: + case NFA_START_INVISIBLE_NEG: case NFA_START_INVISIBLE_BEFORE: + case NFA_START_INVISIBLE_BEFORE_NEG: { nfa_pim_T *pim; int cout = t->state->out1->out->c; *************** *** 4524,4529 **** --- 4542,4548 ---- || cout == NFA_NCLOSE || t->pim != NULL || (t->state->c != NFA_START_INVISIBLE_BEFORE + && t->state->c != NFA_START_INVISIBLE_BEFORE_NEG && failure_chance(t->state->out1->out, 0) < failure_chance(t->state->out, 0))) { *************** *** 4534,4541 **** result = recursive_regmatch(t->state, prog, submatch, m, &listids); ! /* for \@! it is a match when result is FALSE */ ! if (result != t->state->negated) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &m->norm); --- 4553,4563 ---- result = recursive_regmatch(t->state, prog, submatch, m, &listids); ! /* for \@! and \@state->c == NFA_START_INVISIBLE_NEG ! || t->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &m->norm); *************** *** 4646,4656 **** break; case NFA_BOW: ! { ! int bow = TRUE; if (curc == NUL) ! bow = FALSE; #ifdef FEAT_MBYTE else if (has_mbyte) { --- 4668,4677 ---- break; case NFA_BOW: ! result = TRUE; if (curc == NUL) ! result = FALSE; #ifdef FEAT_MBYTE else if (has_mbyte) { *************** *** 4659,4685 **** /* Get class of current and previous char (if it exists). */ this_class = mb_get_class_buf(reginput, reg_buf); if (this_class <= 1) ! bow = FALSE; else if (reg_prev_class() == this_class) ! bow = FALSE; } #endif else if (!vim_iswordc_buf(curc, reg_buf) || (reginput > regline && vim_iswordc_buf(reginput[-1], reg_buf))) ! bow = FALSE; ! if (bow) addstate_here(thislist, t->state->out, &t->subs, t->pim, &listidx); break; - } case NFA_EOW: ! { ! int eow = TRUE; ! if (reginput == regline) ! eow = FALSE; #ifdef FEAT_MBYTE else if (has_mbyte) { --- 4680,4703 ---- /* Get class of current and previous char (if it exists). */ this_class = mb_get_class_buf(reginput, reg_buf); if (this_class <= 1) ! result = FALSE; else if (reg_prev_class() == this_class) ! result = FALSE; } #endif else if (!vim_iswordc_buf(curc, reg_buf) || (reginput > regline && vim_iswordc_buf(reginput[-1], reg_buf))) ! result = FALSE; ! if (result) addstate_here(thislist, t->state->out, &t->subs, t->pim, &listidx); break; case NFA_EOW: ! result = TRUE; if (reginput == regline) ! result = FALSE; #ifdef FEAT_MBYTE else if (has_mbyte) { *************** *** 4690,4707 **** prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) ! eow = FALSE; } #endif else if (!vim_iswordc_buf(reginput[-1], reg_buf) || (reginput[0] != NUL && vim_iswordc_buf(curc, reg_buf))) ! eow = FALSE; ! if (eow) addstate_here(thislist, t->state->out, &t->subs, t->pim, &listidx); break; - } case NFA_BOF: if (reglnum == 0 && reginput == regline --- 4708,4724 ---- prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) ! result = FALSE; } #endif else if (!vim_iswordc_buf(reginput[-1], reg_buf) || (reginput[0] != NUL && vim_iswordc_buf(curc, reg_buf))) ! result = FALSE; ! if (result) addstate_here(thislist, t->state->out, &t->subs, t->pim, &listidx); break; case NFA_BOF: if (reglnum == 0 && reginput == regline *************** *** 4740,4746 **** { /* If \Z was present, then ignore composing characters. * When ignoring the base character this always matches. */ - /* TODO: How about negated? */ if (len == 0 && sta->c != curc) result = FAIL; else --- 4757,4762 ---- *************** *** 4813,4838 **** } break; - case NFA_CLASS_ALNUM: - case NFA_CLASS_ALPHA: - case NFA_CLASS_BLANK: - case NFA_CLASS_CNTRL: - case NFA_CLASS_DIGIT: - case NFA_CLASS_GRAPH: - case NFA_CLASS_LOWER: - case NFA_CLASS_PRINT: - case NFA_CLASS_PUNCT: - case NFA_CLASS_SPACE: - case NFA_CLASS_UPPER: - case NFA_CLASS_XDIGIT: - case NFA_CLASS_TAB: - case NFA_CLASS_RETURN: - case NFA_CLASS_BACKSPACE: - case NFA_CLASS_ESCAPE: - result = check_char_class(t->state->c, curc); - ADD_STATE_IF_MATCH(t->state); - break; - case NFA_START_COLL: case NFA_START_NEG_COLL: { --- 4829,4834 ---- *************** *** 5212,5221 **** int c = t->state->c; /* TODO: put this in #ifdef later */ ! if (c < -256) EMSGN("INTERNAL: Negative state char: %ld", c); - if (is_Magic(c)) - c = un_Magic(c); result = (c == curc); if (!result && ireg_ic) --- 5208,5215 ---- int c = t->state->c; /* TODO: put this in #ifdef later */ ! if (c < 0) EMSGN("INTERNAL: Negative state char: %ld", c); result = (c == curc); if (!result && ireg_ic) *************** *** 5252,5259 **** prog, submatch, m, &listids); t->pim->result = result ? NFA_PIM_MATCH : NFA_PIM_NOMATCH; ! /* for \@! it is a match when result is FALSE */ ! if (result != t->pim->state->negated) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->pim->subs.norm, &m->norm); --- 5246,5257 ---- prog, submatch, m, &listids); t->pim->result = result ? NFA_PIM_MATCH : NFA_PIM_NOMATCH; ! /* for \@! and \@pim->state->c ! == NFA_START_INVISIBLE_NEG ! || t->pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->pim->subs.norm, &m->norm); *************** *** 5274,5281 **** #endif } ! /* for \@! it is a match when result is FALSE */ ! if (result != t->pim->state->negated) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &t->pim->subs.norm); --- 5272,5281 ---- #endif } ! /* for \@! and \@pim->state->c == NFA_START_INVISIBLE_NEG ! || t->pim->state->c ! == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &t->pim->subs.norm); *** ../vim-7.3.1138/src/regexp.h 2013-06-06 18:46:00.000000000 +0200 --- src/regexp.h 2013-06-07 16:11:12.000000000 +0200 *************** *** 73,79 **** nfa_state_T *out1; int id; int lastlist[2]; /* 0: normal, 1: recursive */ - int negated; int val; }; --- 73,78 ---- *** ../vim-7.3.1138/src/version.c 2013-06-07 14:59:14.000000000 +0200 --- src/version.c 2013-06-07 16:11:59.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1139, /**/ -- Common sense is what tells you that the world is flat. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///