in src/couch_quickjs/quickjs/libregexp.c [1999:2411]
static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
StackInt *stack, int stack_len,
const uint8_t *pc, const uint8_t *cptr,
BOOL no_recurse)
{
int opcode, ret;
int cbuf_type;
uint32_t val, c;
const uint8_t *cbuf_end;
cbuf_type = s->cbuf_type;
cbuf_end = s->cbuf_end;
for(;;) {
// printf("top=%p: pc=%d\n", th_list.top, (int)(pc - (bc_buf + RE_HEADER_LEN)));
opcode = *pc++;
switch(opcode) {
case REOP_match:
{
REExecState *rs;
if (no_recurse)
return (intptr_t)cptr;
ret = 1;
goto recurse;
no_match:
if (no_recurse)
return 0;
ret = 0;
recurse:
for(;;) {
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
if (s->state_stack_len == 0)
return ret;
rs = (REExecState *)(s->state_stack +
(s->state_stack_len - 1) * s->state_size);
if (rs->type == RE_EXEC_STATE_SPLIT) {
if (!ret) {
pop_state:
memcpy(capture, rs->buf,
sizeof(capture[0]) * 2 * s->capture_count);
pop_state1:
pc = rs->pc;
cptr = rs->cptr;
stack_len = rs->stack_len;
memcpy(stack, rs->buf + 2 * s->capture_count,
stack_len * sizeof(stack[0]));
s->state_stack_len--;
break;
}
} else if (rs->type == RE_EXEC_STATE_GREEDY_QUANT) {
if (!ret) {
uint32_t char_count, i;
memcpy(capture, rs->buf,
sizeof(capture[0]) * 2 * s->capture_count);
stack_len = rs->stack_len;
memcpy(stack, rs->buf + 2 * s->capture_count,
stack_len * sizeof(stack[0]));
pc = rs->pc;
cptr = rs->cptr;
/* go backward */
char_count = get_u32(pc + 12);
for(i = 0; i < char_count; i++) {
PREV_CHAR(cptr, s->cbuf, cbuf_type);
}
pc = (pc + 16) + (int)get_u32(pc);
rs->cptr = cptr;
rs->count--;
if (rs->count == 0) {
s->state_stack_len--;
}
break;
}
} else {
ret = ((rs->type == RE_EXEC_STATE_LOOKAHEAD && ret) ||
(rs->type == RE_EXEC_STATE_NEGATIVE_LOOKAHEAD && !ret));
if (ret) {
/* keep the capture in case of positive lookahead */
if (rs->type == RE_EXEC_STATE_LOOKAHEAD)
goto pop_state1;
else
goto pop_state;
}
}
s->state_stack_len--;
}
}
break;
case REOP_char32:
val = get_u32(pc);
pc += 4;
goto test_char;
case REOP_char:
val = get_u16(pc);
pc += 2;
test_char:
if (cptr >= cbuf_end)
goto no_match;
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) {
c = lre_canonicalize(c, s->is_unicode);
}
if (val != c)
goto no_match;
break;
case REOP_split_goto_first:
case REOP_split_next_first:
{
const uint8_t *pc1;
val = get_u32(pc);
pc += 4;
if (opcode == REOP_split_next_first) {
pc1 = pc + (int)val;
} else {
pc1 = pc;
pc = pc + (int)val;
}
ret = push_state(s, capture, stack, stack_len,
pc1, cptr, RE_EXEC_STATE_SPLIT, 0);
if (ret < 0)
return LRE_RET_MEMORY_ERROR;
break;
}
case REOP_lookahead:
case REOP_negative_lookahead:
val = get_u32(pc);
pc += 4;
ret = push_state(s, capture, stack, stack_len,
pc + (int)val, cptr,
RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead,
0);
if (ret < 0)
return LRE_RET_MEMORY_ERROR;
break;
case REOP_goto:
val = get_u32(pc);
pc += 4 + (int)val;
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
break;
case REOP_line_start:
if (cptr == s->cbuf)
break;
if (!s->multi_line)
goto no_match;
PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
if (!is_line_terminator(c))
goto no_match;
break;
case REOP_line_end:
if (cptr == cbuf_end)
break;
if (!s->multi_line)
goto no_match;
PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
if (!is_line_terminator(c))
goto no_match;
break;
case REOP_dot:
if (cptr == cbuf_end)
goto no_match;
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (is_line_terminator(c))
goto no_match;
break;
case REOP_any:
if (cptr == cbuf_end)
goto no_match;
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
break;
case REOP_save_start:
case REOP_save_end:
val = *pc++;
assert(val < s->capture_count);
capture[2 * val + opcode - REOP_save_start] = (uint8_t *)cptr;
break;
case REOP_save_reset:
{
uint32_t val2;
val = pc[0];
val2 = pc[1];
pc += 2;
assert(val2 < s->capture_count);
while (val <= val2) {
capture[2 * val] = NULL;
capture[2 * val + 1] = NULL;
val++;
}
}
break;
case REOP_push_i32:
val = get_u32(pc);
pc += 4;
stack[stack_len++] = val;
break;
case REOP_drop:
stack_len--;
break;
case REOP_loop:
val = get_u32(pc);
pc += 4;
if (--stack[stack_len - 1] != 0) {
pc += (int)val;
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
}
break;
case REOP_push_char_pos:
stack[stack_len++] = (uintptr_t)cptr;
break;
case REOP_check_advance:
if (stack[--stack_len] == (uintptr_t)cptr)
goto no_match;
break;
case REOP_word_boundary:
case REOP_not_word_boundary:
{
BOOL v1, v2;
/* char before */
if (cptr == s->cbuf) {
v1 = FALSE;
} else {
PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
v1 = is_word_char(c);
}
/* current char */
if (cptr >= cbuf_end) {
v2 = FALSE;
} else {
PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
v2 = is_word_char(c);
}
if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode))
goto no_match;
}
break;
case REOP_back_reference:
case REOP_backward_back_reference:
{
const uint8_t *cptr1, *cptr1_end, *cptr1_start;
uint32_t c1, c2;
val = *pc++;
if (val >= s->capture_count)
goto no_match;
cptr1_start = capture[2 * val];
cptr1_end = capture[2 * val + 1];
if (!cptr1_start || !cptr1_end)
break;
if (opcode == REOP_back_reference) {
cptr1 = cptr1_start;
while (cptr1 < cptr1_end) {
if (cptr >= cbuf_end)
goto no_match;
GET_CHAR(c1, cptr1, cptr1_end, cbuf_type);
GET_CHAR(c2, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) {
c1 = lre_canonicalize(c1, s->is_unicode);
c2 = lre_canonicalize(c2, s->is_unicode);
}
if (c1 != c2)
goto no_match;
}
} else {
cptr1 = cptr1_end;
while (cptr1 > cptr1_start) {
if (cptr == s->cbuf)
goto no_match;
GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type);
GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type);
if (s->ignore_case) {
c1 = lre_canonicalize(c1, s->is_unicode);
c2 = lre_canonicalize(c2, s->is_unicode);
}
if (c1 != c2)
goto no_match;
}
}
}
break;
case REOP_range:
{
int n;
uint32_t low, high, idx_min, idx_max, idx;
n = get_u16(pc); /* n must be >= 1 */
pc += 2;
if (cptr >= cbuf_end)
goto no_match;
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) {
c = lre_canonicalize(c, s->is_unicode);
}
idx_min = 0;
low = get_u16(pc + 0 * 4);
if (c < low)
goto no_match;
idx_max = n - 1;
high = get_u16(pc + idx_max * 4 + 2);
/* 0xffff in for last value means +infinity */
if (unlikely(c >= 0xffff) && high == 0xffff)
goto range_match;
if (c > high)
goto no_match;
while (idx_min <= idx_max) {
idx = (idx_min + idx_max) / 2;
low = get_u16(pc + idx * 4);
high = get_u16(pc + idx * 4 + 2);
if (c < low)
idx_max = idx - 1;
else if (c > high)
idx_min = idx + 1;
else
goto range_match;
}
goto no_match;
range_match:
pc += 4 * n;
}
break;
case REOP_range32:
{
int n;
uint32_t low, high, idx_min, idx_max, idx;
n = get_u16(pc); /* n must be >= 1 */
pc += 2;
if (cptr >= cbuf_end)
goto no_match;
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) {
c = lre_canonicalize(c, s->is_unicode);
}
idx_min = 0;
low = get_u32(pc + 0 * 8);
if (c < low)
goto no_match;
idx_max = n - 1;
high = get_u32(pc + idx_max * 8 + 4);
if (c > high)
goto no_match;
while (idx_min <= idx_max) {
idx = (idx_min + idx_max) / 2;
low = get_u32(pc + idx * 8);
high = get_u32(pc + idx * 8 + 4);
if (c < low)
idx_max = idx - 1;
else if (c > high)
idx_min = idx + 1;
else
goto range32_match;
}
goto no_match;
range32_match:
pc += 8 * n;
}
break;
case REOP_prev:
/* go to the previous char */
if (cptr == s->cbuf)
goto no_match;
PREV_CHAR(cptr, s->cbuf, cbuf_type);
break;
case REOP_simple_greedy_quant:
{
uint32_t next_pos, quant_min, quant_max;
size_t q;
intptr_t res;
const uint8_t *pc1;
next_pos = get_u32(pc);
quant_min = get_u32(pc + 4);
quant_max = get_u32(pc + 8);
pc += 16;
pc1 = pc;
pc += (int)next_pos;
q = 0;
for(;;) {
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
res = lre_exec_backtrack(s, capture, stack, stack_len,
pc1, cptr, TRUE);
if (res == LRE_RET_MEMORY_ERROR ||
res == LRE_RET_TIMEOUT)
return res;
if (!res)
break;
cptr = (uint8_t *)res;
q++;
if (q >= quant_max && quant_max != INT32_MAX)
break;
}
if (q < quant_min)
goto no_match;
if (q > quant_min) {
/* will examine all matches down to quant_min */
ret = push_state(s, capture, stack, stack_len,
pc1 - 16, cptr,
RE_EXEC_STATE_GREEDY_QUANT,
q - quant_min);
if (ret < 0)
return LRE_RET_MEMORY_ERROR;
}
}
break;
default:
abort();
}
}
}