ext/Modules/_sre/_sre.cpp (5,214 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
// clang-format off
// This file is copy-pasted and inlined from CPython.
// It makes a lot of assumptions about internal string represenations, and as
// such, is very inefficient for Pyro. It is slated for eventual rewrite and
// optimization.
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* partial history:
* 1999-10-24 fl created (based on existing template matcher code)
* 2000-03-06 fl first alpha, sort of
* 2000-08-01 fl fixes for 1.6b1
* 2000-08-07 fl use PyOS_CheckStack() if available
* 2000-09-20 fl added expand method
* 2001-03-20 fl lots of fixes for 2.1b2
* 2001-04-15 fl export copyright as Python attribute, not global
* 2001-04-28 fl added __copy__ methods (work in progress)
* 2001-05-14 fl fixes for 1.5.2 compatibility
* 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
* 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
* 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1
* 2001-10-21 fl added sub/subn primitive
* 2001-10-24 fl added finditer primitive (for 2.2 only)
* 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
* 2002-11-09 fl fixed empty sub/subn return type
* 2003-04-18 mvl fully support 4-byte codes
* 2003-10-17 gn implemented non recursive scheme
* 2013-02-04 mrab added fullmatch primitive
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* This version of the SRE library can be redistributed under CNRI's
* Python 1.6 license. For any other use, please contact Secret Labs
* AB (info@pythonware.com).
*
* Portions of this engine have been developed in cooperation with
* CNRI. Hewlett-Packard provided funding for 1.6 integration and
* other compatibility work.
*/
#include <cassert>
#include <cstring>
#include <cctype>
#include "Python.h"
#include "structmember.h" /* offsetof */
namespace py {
// clang-format off
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
#ifndef SRE_INCLUDED
#define SRE_INCLUDED
// clang-format off
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
#define SRE_MAGIC 20171005
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
#define SRE_OP_ANY_ALL 3
#define SRE_OP_ASSERT 4
#define SRE_OP_ASSERT_NOT 5
#define SRE_OP_AT 6
#define SRE_OP_BRANCH 7
#define SRE_OP_CALL 8
#define SRE_OP_CATEGORY 9
#define SRE_OP_CHARSET 10
#define SRE_OP_BIGCHARSET 11
#define SRE_OP_GROUPREF 12
#define SRE_OP_GROUPREF_EXISTS 13
#define SRE_OP_IN 14
#define SRE_OP_INFO 15
#define SRE_OP_JUMP 16
#define SRE_OP_LITERAL 17
#define SRE_OP_MARK 18
#define SRE_OP_MAX_UNTIL 19
#define SRE_OP_MIN_UNTIL 20
#define SRE_OP_NOT_LITERAL 21
#define SRE_OP_NEGATE 22
#define SRE_OP_RANGE 23
#define SRE_OP_REPEAT 24
#define SRE_OP_REPEAT_ONE 25
#define SRE_OP_SUBPATTERN 26
#define SRE_OP_MIN_REPEAT_ONE 27
#define SRE_OP_GROUPREF_IGNORE 28
#define SRE_OP_IN_IGNORE 29
#define SRE_OP_LITERAL_IGNORE 30
#define SRE_OP_NOT_LITERAL_IGNORE 31
#define SRE_OP_GROUPREF_LOC_IGNORE 32
#define SRE_OP_IN_LOC_IGNORE 33
#define SRE_OP_LITERAL_LOC_IGNORE 34
#define SRE_OP_NOT_LITERAL_LOC_IGNORE 35
#define SRE_OP_GROUPREF_UNI_IGNORE 36
#define SRE_OP_IN_UNI_IGNORE 37
#define SRE_OP_LITERAL_UNI_IGNORE 38
#define SRE_OP_NOT_LITERAL_UNI_IGNORE 39
#define SRE_OP_RANGE_UNI_IGNORE 40
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BEGINNING_STRING 2
#define SRE_AT_BOUNDARY 3
#define SRE_AT_NON_BOUNDARY 4
#define SRE_AT_END 5
#define SRE_AT_END_LINE 6
#define SRE_AT_END_STRING 7
#define SRE_AT_LOC_BOUNDARY 8
#define SRE_AT_LOC_NON_BOUNDARY 9
#define SRE_AT_UNI_BOUNDARY 10
#define SRE_AT_UNI_NON_BOUNDARY 11
#define SRE_CATEGORY_DIGIT 0
#define SRE_CATEGORY_NOT_DIGIT 1
#define SRE_CATEGORY_SPACE 2
#define SRE_CATEGORY_NOT_SPACE 3
#define SRE_CATEGORY_WORD 4
#define SRE_CATEGORY_NOT_WORD 5
#define SRE_CATEGORY_LINEBREAK 6
#define SRE_CATEGORY_NOT_LINEBREAK 7
#define SRE_CATEGORY_LOC_WORD 8
#define SRE_CATEGORY_LOC_NOT_WORD 9
#define SRE_CATEGORY_UNI_DIGIT 10
#define SRE_CATEGORY_UNI_NOT_DIGIT 11
#define SRE_CATEGORY_UNI_SPACE 12
#define SRE_CATEGORY_UNI_NOT_SPACE 13
#define SRE_CATEGORY_UNI_WORD 14
#define SRE_CATEGORY_UNI_NOT_WORD 15
#define SRE_CATEGORY_UNI_LINEBREAK 16
#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17
#define SRE_FLAG_TEMPLATE 1
#define SRE_FLAG_IGNORECASE 2
#define SRE_FLAG_LOCALE 4
#define SRE_FLAG_MULTILINE 8
#define SRE_FLAG_DOTALL 16
#define SRE_FLAG_UNICODE 32
#define SRE_FLAG_VERBOSE 64
#define SRE_FLAG_DEBUG 128
#define SRE_FLAG_ASCII 256
#define SRE_INFO_PREFIX 1
#define SRE_INFO_LITERAL 2
#define SRE_INFO_CHARSET 4
/* size of a code word (must be unsigned short or larger, and
large enough to hold a UCS4 character) */
#define SRE_CODE Py_UCS4
#if SIZEOF_SIZE_T > 4
# define SRE_MAXREPEAT (~(SRE_CODE)0)
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
#else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
#endif
typedef struct {
PyObject_VAR_HEAD
Py_ssize_t groups; /* must be first! */
PyObject* groupindex; /* dict */
PyObject* indexgroup; /* tuple */
/* compatibility */
PyObject* pattern; /* pattern source (or None) */
int flags; /* flags used when compiling pattern source */
PyObject *weakreflist; /* List of weak references */
int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
/* pattern code */
Py_ssize_t codesize;
SRE_CODE code[1];
} PatternObject;
#define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
typedef struct {
PyObject_VAR_HEAD
PyObject* string; /* link to the target string (must be first) */
PyObject* regs; /* cached list of matching spans */
PatternObject* pattern; /* link to the regex (pattern) object */
Py_ssize_t pos, endpos; /* current target slice */
Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
Py_ssize_t groups; /* number of groups (start/end marks) */
Py_ssize_t mark[1];
} MatchObject;
typedef struct SRE_REPEAT_T {
Py_ssize_t count;
SRE_CODE* pattern; /* points to REPEAT operator arguments */
void* last_ptr; /* helper to check for infinite loops */
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
} SRE_REPEAT;
typedef struct {
/* string pointers */
void* ptr; /* current position (also end of current slice) */
void* beginning; /* start of original string */
void* start; /* start of current slice */
void* end; /* end of original string */
/* attributes for the match object */
PyObject* string;
Py_buffer buffer;
Py_ssize_t pos, endpos;
int isbytes;
int charsize; /* character size */
/* registers */
Py_ssize_t lastindex;
Py_ssize_t lastmark;
void** mark;
int match_all;
int must_advance;
/* dynamically allocated stuff */
char* data_stack;
size_t data_stack_size;
size_t data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
} SRE_STATE;
typedef struct {
PyObject_HEAD
PyObject* pattern;
SRE_STATE state;
} ScannerObject;
#endif
static const char copyright[] =
" SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
#define PY_SSIZE_T_CLEAN
#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
#define SRE_MODULE "sre"
#define SRE_PY_MODULE "re"
/* defining this one enables tracing */
#undef VERBOSE
#if defined(_MSC_VER)
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
/* fastest possible local call under MSVC */
#define LOCAL(type) static __inline type __fastcall
#else
#define LOCAL(type) static inline type
#endif
/* error codes */
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
#define SRE_ERROR_STATE -2 /* illegal state */
#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
#define SRE_ERROR_MEMORY -9 /* out of memory */
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
#if defined(VERBOSE)
#define TRACE(v) printf v
#else
#define TRACE(v)
#endif
/* -------------------------------------------------------------------- */
/* search engine state */
#define SRE_IS_DIGIT(ch)\
((ch) < 128 && Py_ISDIGIT(ch))
#define SRE_IS_SPACE(ch)\
((ch) < 128 && Py_ISSPACE(ch))
#define SRE_IS_LINEBREAK(ch)\
((ch) == '\n')
#define SRE_IS_ALNUM(ch)\
((ch) < 128 && Py_ISALNUM(ch))
#define SRE_IS_WORD(ch)\
((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
static unsigned int sre_lower_ascii(unsigned int ch)
{
return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
}
static unsigned int sre_upper_ascii(unsigned int ch)
{
return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
}
/* locale-specific character predicates */
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
* warnings when c's type supports only numbers < N+1 */
#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
static unsigned int sre_lower_locale(unsigned int ch)
{
return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
}
static unsigned int sre_upper_locale(unsigned int ch)
{
return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
}
/* unicode-specific character predicates */
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
static unsigned int sre_lower_unicode(unsigned int ch)
{
return (unsigned int) Py_UNICODE_TOLOWER(ch);
}
static unsigned int sre_upper_unicode(unsigned int ch)
{
return (unsigned int) Py_UNICODE_TOUPPER(ch);
}
LOCAL(int)
sre_category(SRE_CODE category, unsigned int ch)
{
switch (category) {
case SRE_CATEGORY_DIGIT:
return SRE_IS_DIGIT(ch);
case SRE_CATEGORY_NOT_DIGIT:
return !SRE_IS_DIGIT(ch);
case SRE_CATEGORY_SPACE:
return SRE_IS_SPACE(ch);
case SRE_CATEGORY_NOT_SPACE:
return !SRE_IS_SPACE(ch);
case SRE_CATEGORY_WORD:
return SRE_IS_WORD(ch);
case SRE_CATEGORY_NOT_WORD:
return !SRE_IS_WORD(ch);
case SRE_CATEGORY_LINEBREAK:
return SRE_IS_LINEBREAK(ch);
case SRE_CATEGORY_NOT_LINEBREAK:
return !SRE_IS_LINEBREAK(ch);
case SRE_CATEGORY_LOC_WORD:
return SRE_LOC_IS_WORD(ch);
case SRE_CATEGORY_LOC_NOT_WORD:
return !SRE_LOC_IS_WORD(ch);
case SRE_CATEGORY_UNI_DIGIT:
return SRE_UNI_IS_DIGIT(ch);
case SRE_CATEGORY_UNI_NOT_DIGIT:
return !SRE_UNI_IS_DIGIT(ch);
case SRE_CATEGORY_UNI_SPACE:
return SRE_UNI_IS_SPACE(ch);
case SRE_CATEGORY_UNI_NOT_SPACE:
return !SRE_UNI_IS_SPACE(ch);
case SRE_CATEGORY_UNI_WORD:
return SRE_UNI_IS_WORD(ch);
case SRE_CATEGORY_UNI_NOT_WORD:
return !SRE_UNI_IS_WORD(ch);
case SRE_CATEGORY_UNI_LINEBREAK:
return SRE_UNI_IS_LINEBREAK(ch);
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
return !SRE_UNI_IS_LINEBREAK(ch);
}
return 0;
}
LOCAL(int)
char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
{
return ch == pattern
|| (SRE_CODE) sre_lower_locale(ch) == pattern
|| (SRE_CODE) sre_upper_locale(ch) == pattern;
}
/* helpers */
static void
data_stack_dealloc(SRE_STATE* state)
{
if (state->data_stack) {
PyMem_FREE(state->data_stack);
state->data_stack = NULL;
}
state->data_stack_size = state->data_stack_base = 0;
}
static int
data_stack_grow(SRE_STATE* state, Py_ssize_t size)
{
Py_ssize_t minsize, cursize;
minsize = state->data_stack_base+size;
cursize = state->data_stack_size;
if (cursize < minsize) {
void* stack;
cursize = minsize+minsize/4+1024;
TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
stack = PyMem_REALLOC(state->data_stack, cursize);
if (!stack) {
data_stack_dealloc(state);
return SRE_ERROR_MEMORY;
}
state->data_stack = (char *)stack;
state->data_stack_size = cursize;
}
return 0;
}
/* generate 8-bit version */
#define SRE_CHAR Py_UCS1
#define SIZEOF_SRE_CHAR 1
#define SRE(F) sre_ucs1_##F
LOCAL(int)
SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
{
/* check if pointer is at given position */
Py_ssize_t thisp, thatp;
switch (at) {
case SRE_AT_BEGINNING:
case SRE_AT_BEGINNING_STRING:
return ((void*) ptr == state->beginning);
case SRE_AT_BEGINNING_LINE:
return ((void*) ptr == state->beginning ||
SRE_IS_LINEBREAK((int) ptr[-1]));
case SRE_AT_END:
return (((SRE_CHAR *)state->end - ptr == 1 &&
SRE_IS_LINEBREAK((int) ptr[0])) ||
((void*) ptr == state->end));
case SRE_AT_END_LINE:
return ((void*) ptr == state->end ||
SRE_IS_LINEBREAK((int) ptr[0]));
case SRE_AT_END_STRING:
return ((void*) ptr == state->end);
case SRE_AT_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_IS_WORD((int) ptr[0]) : 0;
return thisp != thatp;
case SRE_AT_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_IS_WORD((int) ptr[0]) : 0;
return thisp == thatp;
case SRE_AT_LOC_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
return thisp != thatp;
case SRE_AT_LOC_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
return thisp == thatp;
case SRE_AT_UNI_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
return thisp != thatp;
case SRE_AT_UNI_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
return thisp == thatp;
}
return 0;
}
LOCAL(int)
SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
{
(void) state;
/* check if character is a member of the given set */
int ok = 1;
for (;;) {
switch (*set++) {
case SRE_OP_FAILURE:
return !ok;
case SRE_OP_LITERAL:
/* <LITERAL> <code> */
if (ch == set[0])
return ok;
set++;
break;
case SRE_OP_CATEGORY:
/* <CATEGORY> <code> */
if (sre_category(set[0], (int) ch))
return ok;
set++;
break;
case SRE_OP_CHARSET:
/* <CHARSET> <bitmap> */
if (ch < 256 &&
(set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
return ok;
set += 256/SRE_CODE_BITS;
break;
case SRE_OP_RANGE:
/* <RANGE> <lower> <upper> */
if (set[0] <= ch && ch <= set[1])
return ok;
set += 2;
break;
case SRE_OP_RANGE_UNI_IGNORE:
/* <RANGE_UNI_IGNORE> <lower> <upper> */
{
SRE_CODE uch;
/* ch is already lower cased */
if (set[0] <= ch && ch <= set[1])
return ok;
uch = sre_upper_unicode(ch);
if (set[0] <= uch && uch <= set[1])
return ok;
set += 2;
break;
}
case SRE_OP_NEGATE:
ok = !ok;
break;
case SRE_OP_BIGCHARSET:
/* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
{
Py_ssize_t count, block;
count = *(set++);
if (ch < 0x10000u)
block = ((unsigned char*)set)[ch >> 8];
else
block = -1;
set += 256/sizeof(SRE_CODE);
if (block >=0 &&
(set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
(1u << (ch & (SRE_CODE_BITS-1)))))
return ok;
set += count * (256/SRE_CODE_BITS);
break;
}
default:
/* internal error -- there's not much we can do about it
here, so let's just pretend it didn't match... */
return 0;
}
}
}
LOCAL(int)
SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
{
SRE_CODE lo, up;
lo = sre_lower_locale(ch);
if (SRE(charset)(state, set, lo))
return 1;
up = sre_upper_locale(ch);
return up != lo && SRE(charset)(state, set, up);
}
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel);
LOCAL(Py_ssize_t)
SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
{
SRE_CODE chr;
SRE_CHAR c;
SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
SRE_CHAR* end = (SRE_CHAR *)state->end;
Py_ssize_t i;
/* adjust end */
if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
end = ptr + maxcount;
switch (pattern[0]) {
case SRE_OP_IN:
/* repeated set */
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
ptr++;
break;
case SRE_OP_ANY:
/* repeated dot wildcard. */
TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
ptr++;
break;
case SRE_OP_ANY_ALL:
/* repeated dot wildcard. skip to the end of the target
string, and backtrack from there */
TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
ptr = end;
break;
case SRE_OP_LITERAL:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
c = (SRE_CHAR) chr;
#if SIZEOF_SRE_CHAR < 4
if ((SRE_CODE) c != chr)
; /* literal can't match: doesn't fit in char width */
else
#endif
while (ptr < end && *ptr == c)
ptr++;
break;
case SRE_OP_LITERAL_IGNORE:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
ptr++;
break;
case SRE_OP_LITERAL_UNI_IGNORE:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
ptr++;
break;
case SRE_OP_LITERAL_LOC_IGNORE:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && char_loc_ignore(chr, *ptr))
ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
c = (SRE_CHAR) chr;
#if SIZEOF_SRE_CHAR < 4
if ((SRE_CODE) c != chr)
ptr = end; /* literal can't match: doesn't fit in char width */
else
#endif
while (ptr < end && *ptr != c)
ptr++;
break;
case SRE_OP_NOT_LITERAL_IGNORE:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
ptr++;
break;
case SRE_OP_NOT_LITERAL_UNI_IGNORE:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
ptr++;
break;
case SRE_OP_NOT_LITERAL_LOC_IGNORE:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && !char_loc_ignore(chr, *ptr))
ptr++;
break;
default:
/* repeated single character pattern */
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
while ((SRE_CHAR*) state->ptr < end) {
i = SRE(match)(state, pattern, 0);
if (i < 0)
return i;
if (!i)
break;
}
TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
(SRE_CHAR*) state->ptr - ptr));
return (SRE_CHAR*) state->ptr - ptr;
}
TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
ptr - (SRE_CHAR*) state->ptr));
return ptr - (SRE_CHAR*) state->ptr;
}
#if 0 /* not used in this release */
LOCAL(int)
SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
{
/* check if an SRE_OP_INFO block matches at the current position.
returns the number of SRE_CODE objects to skip if successful, 0
if no match */
SRE_CHAR* end = (SRE_CHAR*) state->end;
SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
Py_ssize_t i;
/* check minimal length */
if (pattern[3] && end - ptr < pattern[3])
return 0;
/* check known prefix */
if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
/* <length> <skip> <prefix data> <overlap data> */
for (i = 0; i < pattern[5]; i++)
if ((SRE_CODE) ptr[i] != pattern[7 + i])
return 0;
return pattern[0] + 2 * pattern[6];
}
return pattern[0];
}
#endif
/* The macros below should be used to protect recursive SRE(match)()
* calls that *failed* and do *not* return immediately (IOW, those
* that will backtrack). Explaining:
*
* - Recursive SRE(match)() returned true: that's usually a success
* (besides atypical cases like ASSERT_NOT), therefore there's no
* reason to restore lastmark;
*
* - Recursive SRE(match)() returned false but the current SRE(match)()
* is returning to the caller: If the current SRE(match)() is the
* top function of the recursion, returning false will be a matching
* failure, and it doesn't matter where lastmark is pointing to.
* If it's *not* the top function, it will be a recursive SRE(match)()
* failure by itself, and the calling SRE(match)() will have to deal
* with the failure by the same rules explained here (it will restore
* lastmark by itself if necessary);
*
* - Recursive SRE(match)() returned false, and will continue the
* outside 'for' loop: must be protected when breaking, since the next
* OP could potentially depend on lastmark;
*
* - Recursive SRE(match)() returned false, and will be called again
* inside a local for/while loop: must be protected between each
* loop iteration, since the recursive SRE(match)() could do anything,
* and could potentially depend on lastmark.
*
* For more information, check the discussion at SF patch #712900.
*/
#define LASTMARK_SAVE() \
do { \
ctx->lastmark = state->lastmark; \
ctx->lastindex = state->lastindex; \
} while (0)
#define LASTMARK_RESTORE() \
do { \
state->lastmark = ctx->lastmark; \
state->lastindex = ctx->lastindex; \
} while (0)
#define RETURN_ERROR(i) do { return i; } while(0)
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
#define RETURN_ON_ERROR(i) \
do { if (i < 0) RETURN_ERROR(i); } while (0)
#define RETURN_ON_SUCCESS(i) \
do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
#define RETURN_ON_FAILURE(i) \
do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
#define DATA_STACK_ALLOC(state, type, ptr) \
do { \
alloc_pos = state->data_stack_base; \
TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
if (sizeof(type) > state->data_stack_size - alloc_pos) { \
int j = data_stack_grow(state, sizeof(type)); \
if (j < 0) return j; \
if (ctx_pos != -1) \
DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
} \
ptr = (type*)(state->data_stack+alloc_pos); \
state->data_stack_base += sizeof(type); \
} while (0)
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
do { \
TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
ptr = (type*)(state->data_stack+pos); \
} while (0)
#define DATA_STACK_PUSH(state, data, size) \
do { \
TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
data, state->data_stack_base, size)); \
if (size > state->data_stack_size - state->data_stack_base) { \
int j = data_stack_grow(state, size); \
if (j < 0) return j; \
if (ctx_pos != -1) \
DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
} \
memcpy(state->data_stack+state->data_stack_base, data, size); \
state->data_stack_base += size; \
} while (0)
#define DATA_STACK_POP(state, data, size, discard) \
do { \
TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
data, state->data_stack_base-size, size)); \
memcpy(data, state->data_stack+state->data_stack_base-size, size); \
if (discard) \
state->data_stack_base -= size; \
} while (0)
#define DATA_STACK_POP_DISCARD(state, size) \
do { \
TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
state->data_stack_base-size, size)); \
state->data_stack_base -= size; \
} while(0)
#define DATA_PUSH(x) \
DATA_STACK_PUSH(state, (x), sizeof(*(x)))
#define DATA_POP(x) \
DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
#define DATA_POP_DISCARD(x) \
DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
#define DATA_ALLOC(t,p) \
DATA_STACK_ALLOC(state, t, p)
#define DATA_LOOKUP_AT(t,p,pos) \
DATA_STACK_LOOKUP_AT(state,t,p,pos)
#define MARK_PUSH(lastmark) \
do if (lastmark > 0) { \
i = lastmark; /* ctx->lastmark may change if reallocated */ \
DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
} while (0)
#define MARK_POP(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
} while (0)
#define MARK_POP_KEEP(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
} while (0)
#define MARK_POP_DISCARD(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
} while (0)
#define JUMP_NONE 0
#define JUMP_MAX_UNTIL_1 1
#define JUMP_MAX_UNTIL_2 2
#define JUMP_MAX_UNTIL_3 3
#define JUMP_MIN_UNTIL_1 4
#define JUMP_MIN_UNTIL_2 5
#define JUMP_MIN_UNTIL_3 6
#define JUMP_REPEAT 7
#define JUMP_REPEAT_ONE_1 8
#define JUMP_REPEAT_ONE_2 9
#define JUMP_MIN_REPEAT_ONE 10
#define JUMP_BRANCH 11
#define JUMP_ASSERT 12
#define JUMP_ASSERT_NOT 13
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
DATA_ALLOC(SRE(match_context), nextctx); \
nextctx->last_ctx_pos = ctx_pos; \
nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
nextctx->toplevel = toplevel_; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
goto entrance; \
jumplabel: \
while (0) /* gcc doesn't like labels at end of scopes */ \
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
typedef struct {
Py_ssize_t last_ctx_pos;
Py_ssize_t jump;
SRE_CHAR* ptr;
SRE_CODE* pattern;
Py_ssize_t count;
Py_ssize_t lastmark;
Py_ssize_t lastindex;
union {
SRE_CODE chr;
SRE_REPEAT* rep;
} u;
int toplevel;
} SRE(match_context);
/* check if string matches the given pattern. returns <0 for
error, 0 for failure, and 1 for success */
LOCAL(Py_ssize_t)
SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
{
SRE_CHAR* end = (SRE_CHAR *)state->end;
Py_ssize_t alloc_pos, ctx_pos = -1;
Py_ssize_t i, ret = 0;
Py_ssize_t jump, temp_pattern;
unsigned int sigcount=0;
SRE(match_context)* ctx;
SRE(match_context)* nextctx;
TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
DATA_ALLOC(SRE(match_context), ctx);
ctx->last_ctx_pos = -1;
ctx->jump = JUMP_NONE;
ctx->pattern = pattern;
ctx->toplevel = toplevel;
ctx_pos = alloc_pos;
entrance:
ctx->ptr = (SRE_CHAR *)state->ptr;
if (ctx->pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> ... */
if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
"need %" PY_FORMAT_SIZE_T "d)\n",
end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
RETURN_FAILURE;
}
ctx->pattern += ctx->pattern[1] + 1;
}
for (;;) {
++sigcount;
if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
RETURN_ERROR(SRE_ERROR_INTERRUPTED);
switch (*ctx->pattern++) {
case SRE_OP_MARK:
/* set mark */
/* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark) {
/* state->lastmark is the highest valid index in the
state->mark array. If it is increased by more than 1,
the intervening marks must be set to NULL to signal
that these marks have not been encountered. */
Py_ssize_t j = state->lastmark + 1;
while (j < i)
state->mark[j++] = NULL;
state->lastmark = i;
}
state->mark[i] = ctx->ptr;
ctx->pattern++;
break;
case SRE_OP_LITERAL:
/* match literal string */
/* <LITERAL> <code> */
TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* match anything that is not literal character */
/* <NOT_LITERAL> <code> */
TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_SUCCESS:
/* end of pattern */
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
if (ctx->toplevel &&
((state->match_all && ctx->ptr != state->end) ||
(state->must_advance && ctx->ptr == state->start)))
{
RETURN_FAILURE;
}
state->ptr = ctx->ptr;
RETURN_SUCCESS;
case SRE_OP_AT:
/* match at given position */
/* <AT> <code> */
TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
RETURN_FAILURE;
ctx->pattern++;
break;
case SRE_OP_CATEGORY:
/* match at given category */
/* <CATEGORY> <code> */
TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_ANY:
/* match anything (except a newline) */
/* <ANY> */
TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
RETURN_FAILURE;
ctx->ptr++;
break;
case SRE_OP_ANY_ALL:
/* match anything */
/* <ANY_ALL> */
TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end)
RETURN_FAILURE;
ctx->ptr++;
break;
case SRE_OP_IN:
/* match set member (or non_member) */
/* <IN> <skip> <set> */
TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end ||
!SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_LITERAL_IGNORE:
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
ctx->pattern, ctx->ptr, ctx->pattern[0]));
if (ctx->ptr >= end ||
sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_LITERAL_UNI_IGNORE:
TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
ctx->pattern, ctx->ptr, ctx->pattern[0]));
if (ctx->ptr >= end ||
sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_LITERAL_LOC_IGNORE:
TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
ctx->pattern, ctx->ptr, ctx->pattern[0]));
if (ctx->ptr >= end
|| !char_loc_ignore(*ctx->pattern, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
ctx->pattern, ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end ||
sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL_UNI_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
ctx->pattern, ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end ||
sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL_LOC_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
ctx->pattern, ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end
|| char_loc_ignore(*ctx->pattern, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_IN_IGNORE:
TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end
|| !SRE(charset)(state, ctx->pattern+1,
(SRE_CODE)sre_lower_ascii(*ctx->ptr)))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_IN_UNI_IGNORE:
TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end
|| !SRE(charset)(state, ctx->pattern+1,
(SRE_CODE)sre_lower_unicode(*ctx->ptr)))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_IN_LOC_IGNORE:
TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end
|| !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_JUMP:
case SRE_OP_INFO:
/* jump forward */
/* <JUMP> <offset> */
TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_BRANCH:
/* alternation */
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
LASTMARK_SAVE();
ctx->u.rep = state->repeat;
if (ctx->u.rep)
MARK_PUSH(ctx->lastmark);
for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
if (ctx->pattern[1] == SRE_OP_LITERAL &&
(ctx->ptr >= end ||
(SRE_CODE) *ctx->ptr != ctx->pattern[2]))
continue;
if (ctx->pattern[1] == SRE_OP_IN &&
(ctx->ptr >= end ||
!SRE(charset)(state, ctx->pattern + 3,
(SRE_CODE) *ctx->ptr)))
continue;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
if (ret) {
if (ctx->u.rep)
MARK_POP_DISCARD(ctx->lastmark);
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
if (ctx->u.rep)
MARK_POP_KEEP(ctx->lastmark);
LASTMARK_RESTORE();
}
if (ctx->u.rep)
MARK_POP_DISCARD(ctx->lastmark);
RETURN_FAILURE;
case SRE_OP_REPEAT_ONE:
/* match repeated sequence (maximizing regexp) */
/* this operator only works if the repeated item is
exactly one character wide, and we're not already
collecting backtracking points. for other cases,
use the MAX_REPEAT operator */
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
RETURN_FAILURE; /* cannot match */
state->ptr = ctx->ptr;
ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
ctx->count = ret;
ctx->ptr += ctx->count;
/* when we arrive here, count contains the number of
matches, and ctx->ptr points to the tail of the target
string. check if the rest of the pattern matches,
and backtrack if not. */
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
RETURN_FAILURE;
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
ctx->ptr == state->end &&
!(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
{
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
}
LASTMARK_SAVE();
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
/* tail starts with a literal. skip positions where
the rest of the pattern cannot possibly match */
ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
for (;;) {
while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
(ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
ctx->ptr--;
ctx->count--;
}
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
break;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
ctx->pattern+ctx->pattern[0]);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
LASTMARK_RESTORE();
ctx->ptr--;
ctx->count--;
}
} else {
/* general case */
while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
ctx->pattern+ctx->pattern[0]);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->ptr--;
ctx->count--;
LASTMARK_RESTORE();
}
}
RETURN_FAILURE;
case SRE_OP_MIN_REPEAT_ONE:
/* match repeated sequence (minimizing regexp) */
/* this operator only works if the repeated item is
exactly one character wide, and we're not already
collecting backtracking points. for other cases,
use the MIN_REPEAT operator */
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
RETURN_FAILURE; /* cannot match */
state->ptr = ctx->ptr;
if (ctx->pattern[1] == 0)
ctx->count = 0;
else {
/* count using pattern min as the maximum */
ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
if (ret < (Py_ssize_t) ctx->pattern[1])
/* didn't match minimum number of times */
RETURN_FAILURE;
/* advance past minimum matches of repeat */
ctx->count = ret;
ctx->ptr += ctx->count;
}
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
!(ctx->toplevel &&
((state->match_all && ctx->ptr != state->end) ||
(state->must_advance && ctx->ptr == state->start))))
{
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
} else {
/* general case */
LASTMARK_SAVE();
temp_pattern = (Py_ssize_t)ctx->pattern[2];
while (temp_pattern == SRE_MAXREPEAT
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
ctx->pattern+ctx->pattern[0]);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
state->ptr = ctx->ptr;
ret = SRE(count)(state, ctx->pattern+3, 1);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
if (ret == 0)
break;
assert(ret == 1);
ctx->ptr++;
ctx->count++;
LASTMARK_RESTORE();
}
}
RETURN_FAILURE;
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
/* install new repeat context */
ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
if (!ctx->u.rep) {
PyErr_NoMemory();
RETURN_FAILURE;
}
ctx->u.rep->count = -1;
ctx->u.rep->pattern = ctx->pattern;
ctx->u.rep->prev = state->repeat;
ctx->u.rep->last_ptr = NULL;
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
state->repeat = ctx->u.rep->prev;
PyObject_FREE(ctx->u.rep);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
RETURN_FAILURE;
case SRE_OP_MAX_UNTIL:
/* maximizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
/* FIXME: we probably need to deal with zero-width
matches in here... */
ctx->u.rep = state->repeat;
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
state->ptr = ctx->ptr;
ctx->count = ctx->u.rep->count+1;
TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
ctx->ptr, ctx->count));
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
ctx->u.rep->pattern+3);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
}
if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
state->ptr != ctx->u.rep->last_ptr) {
/* we may have enough matches, but if we can
match another item, do so */
ctx->u.rep->count = ctx->count;
LASTMARK_SAVE();
MARK_PUSH(ctx->lastmark);
/* zero-width match protection */
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
MARK_POP_DISCARD(ctx->lastmark);
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
}
/* cannot match more repeated items here. make sure the
tail matches */
state->repeat = ctx->u.rep->prev;
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
RETURN_ON_SUCCESS(ret);
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
RETURN_FAILURE;
case SRE_OP_MIN_UNTIL:
/* minimizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
ctx->u.rep = state->repeat;
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
state->ptr = ctx->ptr;
ctx->count = ctx->u.rep->count+1;
TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
ctx->ptr, ctx->count, ctx->u.rep->pattern));
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
ctx->u.rep->pattern+3);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
}
LASTMARK_SAVE();
/* see if the tail matches */
state->repeat = ctx->u.rep->prev;
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
LASTMARK_RESTORE();
if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
&& ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
state->ptr == ctx->u.rep->last_ptr)
RETURN_FAILURE;
ctx->u.rep->count = ctx->count;
/* zero-width match protection */
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
case SRE_OP_GROUPREF:
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end || *ctx->ptr != *p)
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_UNI_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_LOC_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_EXISTS:
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
ctx->pattern += ctx->pattern[1];
break;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p) {
ctx->pattern += ctx->pattern[1];
break;
}
}
}
ctx->pattern += 2;
break;
case SRE_OP_ASSERT:
/* assert subpattern */
/* <ASSERT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1]));
if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
RETURN_FAILURE;
state->ptr = ctx->ptr - ctx->pattern[1];
DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
RETURN_ON_FAILURE(ret);
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_ASSERT_NOT:
/* assert not subpattern */
/* <ASSERT_NOT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1]));
if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
state->ptr = ctx->ptr - ctx->pattern[1];
DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_FAILURE;
}
}
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_FAILURE:
/* immediate failure */
TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
RETURN_FAILURE;
default:
TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[-1]));
RETURN_ERROR(SRE_ERROR_ILLEGAL);
}
}
exit:
ctx_pos = ctx->last_ctx_pos;
jump = ctx->jump;
DATA_POP_DISCARD(ctx);
if (ctx_pos == -1)
return ret;
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
switch (jump) {
case JUMP_MAX_UNTIL_2:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
goto jump_max_until_2;
case JUMP_MAX_UNTIL_3:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
goto jump_max_until_3;
case JUMP_MIN_UNTIL_2:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
goto jump_min_until_2;
case JUMP_MIN_UNTIL_3:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
goto jump_min_until_3;
case JUMP_BRANCH:
TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
goto jump_branch;
case JUMP_MAX_UNTIL_1:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
goto jump_max_until_1;
case JUMP_MIN_UNTIL_1:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
goto jump_min_until_1;
case JUMP_REPEAT:
TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
goto jump_repeat;
case JUMP_REPEAT_ONE_1:
TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
goto jump_repeat_one_1;
case JUMP_REPEAT_ONE_2:
TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
goto jump_repeat_one_2;
case JUMP_MIN_REPEAT_ONE:
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
goto jump_min_repeat_one;
case JUMP_ASSERT:
TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
goto jump_assert;
case JUMP_ASSERT_NOT:
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
goto jump_assert_not;
case JUMP_NONE:
TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
ctx->ptr, ret));
break;
}
return ret; /* should never get here */
}
/* need to reset capturing groups between two SRE(match) callings in loops */
#define RESET_CAPTURE_GROUP() \
do { state->lastmark = state->lastindex = -1; } while (0)
LOCAL(Py_ssize_t)
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
{
SRE_CHAR* ptr = (SRE_CHAR *)state->start;
SRE_CHAR* end = (SRE_CHAR *)state->end;
Py_ssize_t status = 0;
Py_ssize_t prefix_len = 0;
Py_ssize_t prefix_skip = 0;
SRE_CODE* prefix = NULL;
SRE_CODE* charset = NULL;
SRE_CODE* overlap = NULL;
int flags = 0;
if (ptr > end)
return 0;
if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
flags = pattern[2];
if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
TRACE(("reject (got %u chars, need %u)\n",
(unsigned int)(end - ptr), pattern[3]));
return 0;
}
if (pattern[3] > 1) {
/* adjust end point (but make sure we leave at least one
character in there, so literal search will work) */
end -= pattern[3] - 1;
if (end <= ptr)
end = ptr;
}
if (flags & SRE_INFO_PREFIX) {
/* pattern starts with a known prefix */
/* <length> <skip> <prefix data> <overlap data> */
prefix_len = pattern[5];
prefix_skip = pattern[6];
prefix = pattern + 7;
overlap = prefix + prefix_len - 1;
} else if (flags & SRE_INFO_CHARSET)
/* pattern starts with a character from a known set */
/* <charset> */
charset = pattern + 5;
pattern += 1 + pattern[1];
}
TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
prefix, prefix_len, prefix_skip));
TRACE(("charset = %p\n", charset));
if (prefix_len == 1) {
/* pattern starts with a literal character */
SRE_CHAR c = (SRE_CHAR) prefix[0];
#if SIZEOF_SRE_CHAR < 4
if ((SRE_CODE) c != prefix[0])
return 0; /* literal can't match: doesn't fit in char width */
#endif
end = (SRE_CHAR *)state->end;
state->must_advance = 0;
while (ptr < end) {
while (*ptr != c) {
if (++ptr >= end)
return 0;
}
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
state->start = ptr;
state->ptr = ptr + prefix_skip;
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE(match)(state, pattern + 2*prefix_skip, 0);
if (status != 0)
return status;
++ptr;
RESET_CAPTURE_GROUP();
}
return 0;
}
if (prefix_len > 1) {
/* pattern starts with a known prefix. use the overlap
table to skip forward as fast as we possibly can */
Py_ssize_t i = 0;
end = (SRE_CHAR *)state->end;
if (prefix_len > end - ptr)
return 0;
#if SIZEOF_SRE_CHAR < 4
for (i = 0; i < prefix_len; i++)
if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
return 0; /* literal can't match: doesn't fit in char width */
#endif
while (ptr < end) {
SRE_CHAR c = (SRE_CHAR) prefix[0];
while (*ptr++ != c) {
if (ptr >= end)
return 0;
}
if (ptr >= end)
return 0;
i = 1;
state->must_advance = 0;
do {
if (*ptr == (SRE_CHAR) prefix[i]) {
if (++i != prefix_len) {
if (++ptr >= end)
return 0;
continue;
}
/* found a potential match */
TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
state->start = ptr - (prefix_len - 1);
state->ptr = ptr - (prefix_len - prefix_skip - 1);
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE(match)(state, pattern + 2*prefix_skip, 0);
if (status != 0)
return status;
/* close but no cigar -- try again */
if (++ptr >= end)
return 0;
RESET_CAPTURE_GROUP();
}
i = overlap[i];
} while (i != 0);
}
return 0;
}
if (charset) {
/* pattern starts with a character from a known set */
end = (SRE_CHAR *)state->end;
state->must_advance = 0;
for (;;) {
while (ptr < end && !SRE(charset)(state, charset, *ptr))
ptr++;
if (ptr >= end)
return 0;
TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
state->start = ptr;
state->ptr = ptr;
status = SRE(match)(state, pattern, 0);
if (status != 0)
break;
ptr++;
RESET_CAPTURE_GROUP();
}
} else {
/* general case */
assert(ptr <= end);
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
state->start = state->ptr = ptr;
status = SRE(match)(state, pattern, 1);
state->must_advance = 0;
while (status == 0 && ptr < end) {
ptr++;
RESET_CAPTURE_GROUP();
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
state->start = state->ptr = ptr;
status = SRE(match)(state, pattern, 0);
}
}
return status;
}
#undef SRE_CHAR
#undef SIZEOF_SRE_CHAR
#undef SRE
/* vim:ts=4:sw=4:et
*/
/* generate 32-bit unicode version */
#define SRE_CHAR Py_UCS4
#define SIZEOF_SRE_CHAR 4
#define SRE(F) sre_ucs4_##F
LOCAL(int)
SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
{
/* check if pointer is at given position */
Py_ssize_t thisp, thatp;
switch (at) {
case SRE_AT_BEGINNING:
case SRE_AT_BEGINNING_STRING:
return ((void*) ptr == state->beginning);
case SRE_AT_BEGINNING_LINE:
return ((void*) ptr == state->beginning ||
SRE_IS_LINEBREAK((int) ptr[-1]));
case SRE_AT_END:
return (((SRE_CHAR *)state->end - ptr == 1 &&
SRE_IS_LINEBREAK((int) ptr[0])) ||
((void*) ptr == state->end));
case SRE_AT_END_LINE:
return ((void*) ptr == state->end ||
SRE_IS_LINEBREAK((int) ptr[0]));
case SRE_AT_END_STRING:
return ((void*) ptr == state->end);
case SRE_AT_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_IS_WORD((int) ptr[0]) : 0;
return thisp != thatp;
case SRE_AT_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_IS_WORD((int) ptr[0]) : 0;
return thisp == thatp;
case SRE_AT_LOC_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
return thisp != thatp;
case SRE_AT_LOC_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
return thisp == thatp;
case SRE_AT_UNI_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
return thisp != thatp;
case SRE_AT_UNI_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
return thisp == thatp;
}
return 0;
}
LOCAL(int)
SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
{
(void) state;
/* check if character is a member of the given set */
int ok = 1;
for (;;) {
switch (*set++) {
case SRE_OP_FAILURE:
return !ok;
case SRE_OP_LITERAL:
/* <LITERAL> <code> */
if (ch == set[0])
return ok;
set++;
break;
case SRE_OP_CATEGORY:
/* <CATEGORY> <code> */
if (sre_category(set[0], (int) ch))
return ok;
set++;
break;
case SRE_OP_CHARSET:
/* <CHARSET> <bitmap> */
if (ch < 256 &&
(set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
return ok;
set += 256/SRE_CODE_BITS;
break;
case SRE_OP_RANGE:
/* <RANGE> <lower> <upper> */
if (set[0] <= ch && ch <= set[1])
return ok;
set += 2;
break;
case SRE_OP_RANGE_UNI_IGNORE:
/* <RANGE_UNI_IGNORE> <lower> <upper> */
{
SRE_CODE uch;
/* ch is already lower cased */
if (set[0] <= ch && ch <= set[1])
return ok;
uch = sre_upper_unicode(ch);
if (set[0] <= uch && uch <= set[1])
return ok;
set += 2;
break;
}
case SRE_OP_NEGATE:
ok = !ok;
break;
case SRE_OP_BIGCHARSET:
/* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
{
Py_ssize_t count, block;
count = *(set++);
if (ch < 0x10000u)
block = ((unsigned char*)set)[ch >> 8];
else
block = -1;
set += 256/sizeof(SRE_CODE);
if (block >=0 &&
(set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
(1u << (ch & (SRE_CODE_BITS-1)))))
return ok;
set += count * (256/SRE_CODE_BITS);
break;
}
default:
/* internal error -- there's not much we can do about it
here, so let's just pretend it didn't match... */
return 0;
}
}
}
LOCAL(int)
SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
{
SRE_CODE lo, up;
lo = sre_lower_locale(ch);
if (SRE(charset)(state, set, lo))
return 1;
up = sre_upper_locale(ch);
return up != lo && SRE(charset)(state, set, up);
}
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel);
LOCAL(Py_ssize_t)
SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
{
SRE_CODE chr;
SRE_CHAR c;
SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
SRE_CHAR* end = (SRE_CHAR *)state->end;
Py_ssize_t i;
/* adjust end */
if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
end = ptr + maxcount;
switch (pattern[0]) {
case SRE_OP_IN:
/* repeated set */
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
ptr++;
break;
case SRE_OP_ANY:
/* repeated dot wildcard. */
TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
ptr++;
break;
case SRE_OP_ANY_ALL:
/* repeated dot wildcard. skip to the end of the target
string, and backtrack from there */
TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
ptr = end;
break;
case SRE_OP_LITERAL:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
c = (SRE_CHAR) chr;
#if SIZEOF_SRE_CHAR < 4
if ((SRE_CODE) c != chr)
; /* literal can't match: doesn't fit in char width */
else
#endif
while (ptr < end && *ptr == c)
ptr++;
break;
case SRE_OP_LITERAL_IGNORE:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
ptr++;
break;
case SRE_OP_LITERAL_UNI_IGNORE:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
ptr++;
break;
case SRE_OP_LITERAL_LOC_IGNORE:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && char_loc_ignore(chr, *ptr))
ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
c = (SRE_CHAR) chr;
#if SIZEOF_SRE_CHAR < 4
if ((SRE_CODE) c != chr)
ptr = end; /* literal can't match: doesn't fit in char width */
else
#endif
while (ptr < end && *ptr != c)
ptr++;
break;
case SRE_OP_NOT_LITERAL_IGNORE:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
ptr++;
break;
case SRE_OP_NOT_LITERAL_UNI_IGNORE:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
ptr++;
break;
case SRE_OP_NOT_LITERAL_LOC_IGNORE:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && !char_loc_ignore(chr, *ptr))
ptr++;
break;
default:
/* repeated single character pattern */
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
while ((SRE_CHAR*) state->ptr < end) {
i = SRE(match)(state, pattern, 0);
if (i < 0)
return i;
if (!i)
break;
}
TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
(SRE_CHAR*) state->ptr - ptr));
return (SRE_CHAR*) state->ptr - ptr;
}
TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
ptr - (SRE_CHAR*) state->ptr));
return ptr - (SRE_CHAR*) state->ptr;
}
#if 0 /* not used in this release */
LOCAL(int)
SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
{
/* check if an SRE_OP_INFO block matches at the current position.
returns the number of SRE_CODE objects to skip if successful, 0
if no match */
SRE_CHAR* end = (SRE_CHAR*) state->end;
SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
Py_ssize_t i;
/* check minimal length */
if (pattern[3] && end - ptr < pattern[3])
return 0;
/* check known prefix */
if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
/* <length> <skip> <prefix data> <overlap data> */
for (i = 0; i < pattern[5]; i++)
if ((SRE_CODE) ptr[i] != pattern[7 + i])
return 0;
return pattern[0] + 2 * pattern[6];
}
return pattern[0];
}
#endif
/* The macros below should be used to protect recursive SRE(match)()
* calls that *failed* and do *not* return immediately (IOW, those
* that will backtrack). Explaining:
*
* - Recursive SRE(match)() returned true: that's usually a success
* (besides atypical cases like ASSERT_NOT), therefore there's no
* reason to restore lastmark;
*
* - Recursive SRE(match)() returned false but the current SRE(match)()
* is returning to the caller: If the current SRE(match)() is the
* top function of the recursion, returning false will be a matching
* failure, and it doesn't matter where lastmark is pointing to.
* If it's *not* the top function, it will be a recursive SRE(match)()
* failure by itself, and the calling SRE(match)() will have to deal
* with the failure by the same rules explained here (it will restore
* lastmark by itself if necessary);
*
* - Recursive SRE(match)() returned false, and will continue the
* outside 'for' loop: must be protected when breaking, since the next
* OP could potentially depend on lastmark;
*
* - Recursive SRE(match)() returned false, and will be called again
* inside a local for/while loop: must be protected between each
* loop iteration, since the recursive SRE(match)() could do anything,
* and could potentially depend on lastmark.
*
* For more information, check the discussion at SF patch #712900.
*/
#define LASTMARK_SAVE() \
do { \
ctx->lastmark = state->lastmark; \
ctx->lastindex = state->lastindex; \
} while (0)
#define LASTMARK_RESTORE() \
do { \
state->lastmark = ctx->lastmark; \
state->lastindex = ctx->lastindex; \
} while (0)
#define RETURN_ERROR(i) do { return i; } while(0)
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
#define RETURN_ON_ERROR(i) \
do { if (i < 0) RETURN_ERROR(i); } while (0)
#define RETURN_ON_SUCCESS(i) \
do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
#define RETURN_ON_FAILURE(i) \
do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
#define DATA_STACK_ALLOC(state, type, ptr) \
do { \
alloc_pos = state->data_stack_base; \
TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
if (sizeof(type) > state->data_stack_size - alloc_pos) { \
int j = data_stack_grow(state, sizeof(type)); \
if (j < 0) return j; \
if (ctx_pos != -1) \
DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
} \
ptr = (type*)(state->data_stack+alloc_pos); \
state->data_stack_base += sizeof(type); \
} while (0)
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
do { \
TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
ptr = (type*)(state->data_stack+pos); \
} while (0)
#define DATA_STACK_PUSH(state, data, size) \
do { \
TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
data, state->data_stack_base, size)); \
if (size > state->data_stack_size - state->data_stack_base) { \
int j = data_stack_grow(state, size); \
if (j < 0) return j; \
if (ctx_pos != -1) \
DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
} \
memcpy(state->data_stack+state->data_stack_base, data, size); \
state->data_stack_base += size; \
} while (0)
#define DATA_STACK_POP(state, data, size, discard) \
do { \
TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
data, state->data_stack_base-size, size)); \
memcpy(data, state->data_stack+state->data_stack_base-size, size); \
if (discard) \
state->data_stack_base -= size; \
} while (0)
#define DATA_STACK_POP_DISCARD(state, size) \
do { \
TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
"(%" PY_FORMAT_SIZE_T "d)\n", \
state->data_stack_base-size, size)); \
state->data_stack_base -= size; \
} while(0)
#define DATA_PUSH(x) \
DATA_STACK_PUSH(state, (x), sizeof(*(x)))
#define DATA_POP(x) \
DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
#define DATA_POP_DISCARD(x) \
DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
#define DATA_ALLOC(t,p) \
DATA_STACK_ALLOC(state, t, p)
#define DATA_LOOKUP_AT(t,p,pos) \
DATA_STACK_LOOKUP_AT(state,t,p,pos)
#define MARK_PUSH(lastmark) \
do if (lastmark > 0) { \
i = lastmark; /* ctx->lastmark may change if reallocated */ \
DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
} while (0)
#define MARK_POP(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
} while (0)
#define MARK_POP_KEEP(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
} while (0)
#define MARK_POP_DISCARD(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
} while (0)
#define JUMP_NONE 0
#define JUMP_MAX_UNTIL_1 1
#define JUMP_MAX_UNTIL_2 2
#define JUMP_MAX_UNTIL_3 3
#define JUMP_MIN_UNTIL_1 4
#define JUMP_MIN_UNTIL_2 5
#define JUMP_MIN_UNTIL_3 6
#define JUMP_REPEAT 7
#define JUMP_REPEAT_ONE_1 8
#define JUMP_REPEAT_ONE_2 9
#define JUMP_MIN_REPEAT_ONE 10
#define JUMP_BRANCH 11
#define JUMP_ASSERT 12
#define JUMP_ASSERT_NOT 13
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
DATA_ALLOC(SRE(match_context), nextctx); \
nextctx->last_ctx_pos = ctx_pos; \
nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
nextctx->toplevel = toplevel_; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
goto entrance; \
jumplabel: \
while (0) /* gcc doesn't like labels at end of scopes */ \
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
typedef struct {
Py_ssize_t last_ctx_pos;
Py_ssize_t jump;
SRE_CHAR* ptr;
SRE_CODE* pattern;
Py_ssize_t count;
Py_ssize_t lastmark;
Py_ssize_t lastindex;
union {
SRE_CODE chr;
SRE_REPEAT* rep;
} u;
int toplevel;
} SRE(match_context);
/* check if string matches the given pattern. returns <0 for
error, 0 for failure, and 1 for success */
LOCAL(Py_ssize_t)
SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
{
SRE_CHAR* end = (SRE_CHAR *)state->end;
Py_ssize_t alloc_pos, ctx_pos = -1;
Py_ssize_t i, ret = 0;
Py_ssize_t jump, temp_pattern;
unsigned int sigcount=0;
SRE(match_context)* ctx;
SRE(match_context)* nextctx;
TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
DATA_ALLOC(SRE(match_context), ctx);
ctx->last_ctx_pos = -1;
ctx->jump = JUMP_NONE;
ctx->pattern = pattern;
ctx->toplevel = toplevel;
ctx_pos = alloc_pos;
entrance:
ctx->ptr = (SRE_CHAR *)state->ptr;
if (ctx->pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> ... */
if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
"need %" PY_FORMAT_SIZE_T "d)\n",
end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
RETURN_FAILURE;
}
ctx->pattern += ctx->pattern[1] + 1;
}
for (;;) {
++sigcount;
if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
RETURN_ERROR(SRE_ERROR_INTERRUPTED);
switch (*ctx->pattern++) {
case SRE_OP_MARK:
/* set mark */
/* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark) {
/* state->lastmark is the highest valid index in the
state->mark array. If it is increased by more than 1,
the intervening marks must be set to NULL to signal
that these marks have not been encountered. */
Py_ssize_t j = state->lastmark + 1;
while (j < i)
state->mark[j++] = NULL;
state->lastmark = i;
}
state->mark[i] = ctx->ptr;
ctx->pattern++;
break;
case SRE_OP_LITERAL:
/* match literal string */
/* <LITERAL> <code> */
TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* match anything that is not literal character */
/* <NOT_LITERAL> <code> */
TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_SUCCESS:
/* end of pattern */
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
if (ctx->toplevel &&
((state->match_all && ctx->ptr != state->end) ||
(state->must_advance && ctx->ptr == state->start)))
{
RETURN_FAILURE;
}
state->ptr = ctx->ptr;
RETURN_SUCCESS;
case SRE_OP_AT:
/* match at given position */
/* <AT> <code> */
TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
RETURN_FAILURE;
ctx->pattern++;
break;
case SRE_OP_CATEGORY:
/* match at given category */
/* <CATEGORY> <code> */
TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_ANY:
/* match anything (except a newline) */
/* <ANY> */
TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
RETURN_FAILURE;
ctx->ptr++;
break;
case SRE_OP_ANY_ALL:
/* match anything */
/* <ANY_ALL> */
TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end)
RETURN_FAILURE;
ctx->ptr++;
break;
case SRE_OP_IN:
/* match set member (or non_member) */
/* <IN> <skip> <set> */
TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end ||
!SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_LITERAL_IGNORE:
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
ctx->pattern, ctx->ptr, ctx->pattern[0]));
if (ctx->ptr >= end ||
sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_LITERAL_UNI_IGNORE:
TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
ctx->pattern, ctx->ptr, ctx->pattern[0]));
if (ctx->ptr >= end ||
sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_LITERAL_LOC_IGNORE:
TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
ctx->pattern, ctx->ptr, ctx->pattern[0]));
if (ctx->ptr >= end
|| !char_loc_ignore(*ctx->pattern, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
ctx->pattern, ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end ||
sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL_UNI_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
ctx->pattern, ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end ||
sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL_LOC_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
ctx->pattern, ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end
|| char_loc_ignore(*ctx->pattern, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_IN_IGNORE:
TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end
|| !SRE(charset)(state, ctx->pattern+1,
(SRE_CODE)sre_lower_ascii(*ctx->ptr)))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_IN_UNI_IGNORE:
TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end
|| !SRE(charset)(state, ctx->pattern+1,
(SRE_CODE)sre_lower_unicode(*ctx->ptr)))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_IN_LOC_IGNORE:
TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end
|| !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_JUMP:
case SRE_OP_INFO:
/* jump forward */
/* <JUMP> <offset> */
TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_BRANCH:
/* alternation */
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
LASTMARK_SAVE();
ctx->u.rep = state->repeat;
if (ctx->u.rep)
MARK_PUSH(ctx->lastmark);
for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
if (ctx->pattern[1] == SRE_OP_LITERAL &&
(ctx->ptr >= end ||
(SRE_CODE) *ctx->ptr != ctx->pattern[2]))
continue;
if (ctx->pattern[1] == SRE_OP_IN &&
(ctx->ptr >= end ||
!SRE(charset)(state, ctx->pattern + 3,
(SRE_CODE) *ctx->ptr)))
continue;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
if (ret) {
if (ctx->u.rep)
MARK_POP_DISCARD(ctx->lastmark);
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
if (ctx->u.rep)
MARK_POP_KEEP(ctx->lastmark);
LASTMARK_RESTORE();
}
if (ctx->u.rep)
MARK_POP_DISCARD(ctx->lastmark);
RETURN_FAILURE;
case SRE_OP_REPEAT_ONE:
/* match repeated sequence (maximizing regexp) */
/* this operator only works if the repeated item is
exactly one character wide, and we're not already
collecting backtracking points. for other cases,
use the MAX_REPEAT operator */
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
RETURN_FAILURE; /* cannot match */
state->ptr = ctx->ptr;
ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
ctx->count = ret;
ctx->ptr += ctx->count;
/* when we arrive here, count contains the number of
matches, and ctx->ptr points to the tail of the target
string. check if the rest of the pattern matches,
and backtrack if not. */
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
RETURN_FAILURE;
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
ctx->ptr == state->end &&
!(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
{
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
}
LASTMARK_SAVE();
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
/* tail starts with a literal. skip positions where
the rest of the pattern cannot possibly match */
ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
for (;;) {
while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
(ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
ctx->ptr--;
ctx->count--;
}
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
break;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
ctx->pattern+ctx->pattern[0]);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
LASTMARK_RESTORE();
ctx->ptr--;
ctx->count--;
}
} else {
/* general case */
while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
ctx->pattern+ctx->pattern[0]);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->ptr--;
ctx->count--;
LASTMARK_RESTORE();
}
}
RETURN_FAILURE;
case SRE_OP_MIN_REPEAT_ONE:
/* match repeated sequence (minimizing regexp) */
/* this operator only works if the repeated item is
exactly one character wide, and we're not already
collecting backtracking points. for other cases,
use the MIN_REPEAT operator */
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
RETURN_FAILURE; /* cannot match */
state->ptr = ctx->ptr;
if (ctx->pattern[1] == 0)
ctx->count = 0;
else {
/* count using pattern min as the maximum */
ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
if (ret < (Py_ssize_t) ctx->pattern[1])
/* didn't match minimum number of times */
RETURN_FAILURE;
/* advance past minimum matches of repeat */
ctx->count = ret;
ctx->ptr += ctx->count;
}
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
!(ctx->toplevel &&
((state->match_all && ctx->ptr != state->end) ||
(state->must_advance && ctx->ptr == state->start))))
{
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
} else {
/* general case */
LASTMARK_SAVE();
temp_pattern = (Py_ssize_t)ctx->pattern[2];
while (temp_pattern == SRE_MAXREPEAT
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
ctx->pattern+ctx->pattern[0]);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
state->ptr = ctx->ptr;
ret = SRE(count)(state, ctx->pattern+3, 1);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
if (ret == 0)
break;
assert(ret == 1);
ctx->ptr++;
ctx->count++;
LASTMARK_RESTORE();
}
}
RETURN_FAILURE;
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
/* install new repeat context */
ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
if (!ctx->u.rep) {
PyErr_NoMemory();
RETURN_FAILURE;
}
ctx->u.rep->count = -1;
ctx->u.rep->pattern = ctx->pattern;
ctx->u.rep->prev = state->repeat;
ctx->u.rep->last_ptr = NULL;
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
state->repeat = ctx->u.rep->prev;
PyObject_FREE(ctx->u.rep);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
RETURN_FAILURE;
case SRE_OP_MAX_UNTIL:
/* maximizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
/* FIXME: we probably need to deal with zero-width
matches in here... */
ctx->u.rep = state->repeat;
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
state->ptr = ctx->ptr;
ctx->count = ctx->u.rep->count+1;
TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
ctx->ptr, ctx->count));
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
ctx->u.rep->pattern+3);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
}
if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
state->ptr != ctx->u.rep->last_ptr) {
/* we may have enough matches, but if we can
match another item, do so */
ctx->u.rep->count = ctx->count;
LASTMARK_SAVE();
MARK_PUSH(ctx->lastmark);
/* zero-width match protection */
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
MARK_POP_DISCARD(ctx->lastmark);
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
}
/* cannot match more repeated items here. make sure the
tail matches */
state->repeat = ctx->u.rep->prev;
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
RETURN_ON_SUCCESS(ret);
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
RETURN_FAILURE;
case SRE_OP_MIN_UNTIL:
/* minimizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
ctx->u.rep = state->repeat;
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
state->ptr = ctx->ptr;
ctx->count = ctx->u.rep->count+1;
TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
ctx->ptr, ctx->count, ctx->u.rep->pattern));
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
ctx->u.rep->pattern+3);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
}
LASTMARK_SAVE();
/* see if the tail matches */
state->repeat = ctx->u.rep->prev;
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
LASTMARK_RESTORE();
if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
&& ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
state->ptr == ctx->u.rep->last_ptr)
RETURN_FAILURE;
ctx->u.rep->count = ctx->count;
/* zero-width match protection */
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
case SRE_OP_GROUPREF:
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end || *ctx->ptr != *p)
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_UNI_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_LOC_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
RETURN_FAILURE;
p++;
ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_EXISTS:
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
i = ctx->pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
ctx->pattern += ctx->pattern[1];
break;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p) {
ctx->pattern += ctx->pattern[1];
break;
}
}
}
ctx->pattern += 2;
break;
case SRE_OP_ASSERT:
/* assert subpattern */
/* <ASSERT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1]));
if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
RETURN_FAILURE;
state->ptr = ctx->ptr - ctx->pattern[1];
DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
RETURN_ON_FAILURE(ret);
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_ASSERT_NOT:
/* assert not subpattern */
/* <ASSERT_NOT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1]));
if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
state->ptr = ctx->ptr - ctx->pattern[1];
DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_FAILURE;
}
}
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_FAILURE:
/* immediate failure */
TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
RETURN_FAILURE;
default:
TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[-1]));
RETURN_ERROR(SRE_ERROR_ILLEGAL);
}
}
exit:
ctx_pos = ctx->last_ctx_pos;
jump = ctx->jump;
DATA_POP_DISCARD(ctx);
if (ctx_pos == -1)
return ret;
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
switch (jump) {
case JUMP_MAX_UNTIL_2:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
goto jump_max_until_2;
case JUMP_MAX_UNTIL_3:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
goto jump_max_until_3;
case JUMP_MIN_UNTIL_2:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
goto jump_min_until_2;
case JUMP_MIN_UNTIL_3:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
goto jump_min_until_3;
case JUMP_BRANCH:
TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
goto jump_branch;
case JUMP_MAX_UNTIL_1:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
goto jump_max_until_1;
case JUMP_MIN_UNTIL_1:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
goto jump_min_until_1;
case JUMP_REPEAT:
TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
goto jump_repeat;
case JUMP_REPEAT_ONE_1:
TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
goto jump_repeat_one_1;
case JUMP_REPEAT_ONE_2:
TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
goto jump_repeat_one_2;
case JUMP_MIN_REPEAT_ONE:
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
goto jump_min_repeat_one;
case JUMP_ASSERT:
TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
goto jump_assert;
case JUMP_ASSERT_NOT:
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
goto jump_assert_not;
case JUMP_NONE:
TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
ctx->ptr, ret));
break;
}
return ret; /* should never get here */
}
/* need to reset capturing groups between two SRE(match) callings in loops */
#define RESET_CAPTURE_GROUP() \
do { state->lastmark = state->lastindex = -1; } while (0)
LOCAL(Py_ssize_t)
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
{
SRE_CHAR* ptr = (SRE_CHAR *)state->start;
SRE_CHAR* end = (SRE_CHAR *)state->end;
Py_ssize_t status = 0;
Py_ssize_t prefix_len = 0;
Py_ssize_t prefix_skip = 0;
SRE_CODE* prefix = NULL;
SRE_CODE* charset = NULL;
SRE_CODE* overlap = NULL;
int flags = 0;
if (ptr > end)
return 0;
if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
flags = pattern[2];
if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
TRACE(("reject (got %u chars, need %u)\n",
(unsigned int)(end - ptr), pattern[3]));
return 0;
}
if (pattern[3] > 1) {
/* adjust end point (but make sure we leave at least one
character in there, so literal search will work) */
end -= pattern[3] - 1;
if (end <= ptr)
end = ptr;
}
if (flags & SRE_INFO_PREFIX) {
/* pattern starts with a known prefix */
/* <length> <skip> <prefix data> <overlap data> */
prefix_len = pattern[5];
prefix_skip = pattern[6];
prefix = pattern + 7;
overlap = prefix + prefix_len - 1;
} else if (flags & SRE_INFO_CHARSET)
/* pattern starts with a character from a known set */
/* <charset> */
charset = pattern + 5;
pattern += 1 + pattern[1];
}
TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
prefix, prefix_len, prefix_skip));
TRACE(("charset = %p\n", charset));
if (prefix_len == 1) {
/* pattern starts with a literal character */
SRE_CHAR c = (SRE_CHAR) prefix[0];
#if SIZEOF_SRE_CHAR < 4
if ((SRE_CODE) c != prefix[0])
return 0; /* literal can't match: doesn't fit in char width */
#endif
end = (SRE_CHAR *)state->end;
state->must_advance = 0;
while (ptr < end) {
while (*ptr != c) {
if (++ptr >= end)
return 0;
}
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
state->start = ptr;
state->ptr = ptr + prefix_skip;
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE(match)(state, pattern + 2*prefix_skip, 0);
if (status != 0)
return status;
++ptr;
RESET_CAPTURE_GROUP();
}
return 0;
}
if (prefix_len > 1) {
/* pattern starts with a known prefix. use the overlap
table to skip forward as fast as we possibly can */
Py_ssize_t i = 0;
end = (SRE_CHAR *)state->end;
if (prefix_len > end - ptr)
return 0;
#if SIZEOF_SRE_CHAR < 4
for (i = 0; i < prefix_len; i++)
if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
return 0; /* literal can't match: doesn't fit in char width */
#endif
while (ptr < end) {
SRE_CHAR c = (SRE_CHAR) prefix[0];
while (*ptr++ != c) {
if (ptr >= end)
return 0;
}
if (ptr >= end)
return 0;
i = 1;
state->must_advance = 0;
do {
if (*ptr == (SRE_CHAR) prefix[i]) {
if (++i != prefix_len) {
if (++ptr >= end)
return 0;
continue;
}
/* found a potential match */
TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
state->start = ptr - (prefix_len - 1);
state->ptr = ptr - (prefix_len - prefix_skip - 1);
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE(match)(state, pattern + 2*prefix_skip, 0);
if (status != 0)
return status;
/* close but no cigar -- try again */
if (++ptr >= end)
return 0;
RESET_CAPTURE_GROUP();
}
i = overlap[i];
} while (i != 0);
}
return 0;
}
if (charset) {
/* pattern starts with a character from a known set */
end = (SRE_CHAR *)state->end;
state->must_advance = 0;
for (;;) {
while (ptr < end && !SRE(charset)(state, charset, *ptr))
ptr++;
if (ptr >= end)
return 0;
TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
state->start = ptr;
state->ptr = ptr;
status = SRE(match)(state, pattern, 0);
if (status != 0)
break;
ptr++;
RESET_CAPTURE_GROUP();
}
} else {
/* general case */
assert(ptr <= end);
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
state->start = state->ptr = ptr;
status = SRE(match)(state, pattern, 1);
state->must_advance = 0;
while (status == 0 && ptr < end) {
ptr++;
RESET_CAPTURE_GROUP();
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
state->start = state->ptr = ptr;
status = SRE(match)(state, pattern, 0);
}
}
return status;
}
#undef SRE_CHAR
#undef SIZEOF_SRE_CHAR
#undef SRE
/* vim:ts=4:sw=4:et
*/
/* -------------------------------------------------------------------- */
/* factories and destructors */
/* see sre.h for object declarations */
static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
/*[clinic input]
module _sre
class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
class _sre.SRE_Match "MatchObject *" "&Match_Type"
class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
typedef struct {
PyObject *Pattern_Type;
PyObject *Match_Type;
PyObject *Scanner_Type;
} _srestate;
#define _srestate(o) ((_srestate *)PyModule_GetState(o))
extern struct PyModuleDef sremodule;
#define _srestate_global ((_srestate *)PyModule_GetState(PyState_FindModule(&sremodule)))
/*[clinic input]
_sre.getcodesize -> int
[clinic start generated code]*/
static int
_sre_getcodesize_impl(PyObject *module)
/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
{
(void) module;
return sizeof(SRE_CODE);
}
/*[clinic input]
_sre.ascii_iscased -> bool
character: int
/
[clinic start generated code]*/
static int
_sre_ascii_iscased_impl(PyObject *module, int character)
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
{
(void) module;
unsigned int ch = (unsigned int)character;
return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
}
/*[clinic input]
_sre.unicode_iscased -> bool
character: int
/
[clinic start generated code]*/
static int
_sre_unicode_iscased_impl(PyObject *module, int character)
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
{
(void) module;
unsigned int ch = (unsigned int)character;
return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
}
/*[clinic input]
_sre.ascii_tolower -> int
character: int
/
[clinic start generated code]*/
static int
_sre_ascii_tolower_impl(PyObject *module, int character)
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
{
(void) module;
return sre_lower_ascii(character);
}
/*[clinic input]
_sre.unicode_tolower -> int
character: int
/
[clinic start generated code]*/
static int
_sre_unicode_tolower_impl(PyObject *module, int character)
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
{
(void) module;
return sre_lower_unicode(character);
}
LOCAL(void)
state_reset(SRE_STATE* state)
{
/* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
/*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
state->lastmark = -1;
state->lastindex = -1;
state->repeat = NULL;
data_stack_dealloc(state);
}
static void*
getstring(PyObject* string, Py_ssize_t* p_length,
int* p_isbytes, int* p_charsize,
Py_buffer *view)
{
/* given a python object, return a data pointer, a length (in
characters), and a character size. return NULL if the object
is not a string (or not compatible) */
/* Unicode objects do not support the buffer API. So, get the data
directly instead. */
if (PyUnicode_Check(string)) {
if (PyUnicode_READY(string) == -1)
return NULL;
*p_length = PyUnicode_GET_LENGTH(string);
*p_charsize = 1;
*p_isbytes = 0;
return PyUnicode_DATA(string);
}
/* get pointer to byte string buffer */
if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
return NULL;
}
*p_length = view->len;
*p_charsize = 1;
*p_isbytes = 1;
if (view->buf == NULL) {
PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
PyBuffer_Release(view);
view->buf = NULL;
return NULL;
}
return view->buf;
}
LOCAL(PyObject*)
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Py_ssize_t start, Py_ssize_t end)
{
/* prepare state object */
Py_ssize_t length;
int isbytes, charsize;
void* ptr;
memset(state, 0, sizeof(SRE_STATE));
state->mark = PyMem_New(void *, pattern->groups * 2);
if (!state->mark) {
PyErr_NoMemory();
goto err;
}
state->lastmark = -1;
state->lastindex = -1;
state->buffer.buf = NULL;
ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
if (!ptr)
goto err;
if (isbytes && pattern->isbytes == 0) {
PyErr_SetString(PyExc_TypeError,
"cannot use a string pattern on a bytes-like object");
goto err;
}
if (!isbytes && pattern->isbytes > 0) {
PyErr_SetString(PyExc_TypeError,
"cannot use a bytes pattern on a string-like object");
goto err;
}
if (!isbytes) {
Py_ssize_t codepointlen = 0;
charsize = 1;
for (int codeunit_index = 0; codepointlen < length;
codepointlen++, codeunit_index++) {
unsigned char ch = ((unsigned char*)ptr)[codeunit_index];
if (ch < 0x80) {
continue;
}
charsize = 4;
if ((ch & 0xdf) == ch) {
codeunit_index++;
continue;
}
if ((ch & 0xef) == ch) {
codeunit_index += 2;
continue;
}
codeunit_index += 3;
}
void* ucs4buf = PyMem_Malloc(codepointlen * charsize);
if (charsize == 1) {
memcpy(ucs4buf, ptr, codepointlen);
ptr = ucs4buf;
} else {
for (int i = 0, j = 0; i < codepointlen; i++) {
unsigned char ch = ((unsigned char*)ptr)[j];
if (ch < 0x80) {
((Py_UCS4*)ucs4buf)[i] = ch;
j++;
continue;
}
unsigned char ch2 = ((unsigned char*)ptr)[j+1];
if ((ch & 0xdf) == ch) {
((Py_UCS4*)ucs4buf)[i] = ((ch & 0x1f) << 6) | (ch2 & 0x3f);
j += 2;
continue;
}
unsigned char ch3 = ((unsigned char*)ptr)[j+2];
if ((ch & 0xef) == ch) {
((Py_UCS4*)ucs4buf)[i] = ((ch & 0x0f) << 12) |
((ch2 & 0x3f) << 6) |
(ch3 & 0x3f);
j += 3;
continue;
}
unsigned char ch4 = ((unsigned char*)ptr)[j+3];
((Py_UCS4*)ucs4buf)[i] = ((ch & 0x07) << 18) |
((ch2 & 0x3f) << 12) |
((ch3 & 0x3F) << 6) |
(ch4 & 0x3f);
j += 4;
}
}
ptr = ucs4buf;
}
/* adjust boundaries */
if (start < 0)
start = 0;
else if (start > length)
start = length;
if (end < 0)
end = 0;
else if (end > length)
end = length;
state->isbytes = isbytes;
state->charsize = charsize;
state->match_all = 0;
state->must_advance = 0;
state->beginning = ptr;
state->start = (void*) ((char*) ptr + start * state->charsize);
state->end = (void*) ((char*) ptr + end * state->charsize);
Py_INCREF(string);
state->string = string;
state->pos = start;
state->endpos = end;
return string;
err:
PyMem_Del(state->mark);
state->mark = NULL;
if (state->buffer.buf)
PyBuffer_Release(&state->buffer);
return NULL;
}
LOCAL(void)
state_fini(SRE_STATE* state)
{
if (state->buffer.buf)
PyBuffer_Release(&state->buffer);
Py_XDECREF(state->string);
data_stack_dealloc(state);
PyMem_Del(state->mark);
if (!state->isbytes) {
PyMem_Free(state->beginning);
}
state->mark = NULL;
}
/* calculate offset from start of string */
#define STATE_OFFSET(state, member)\
(((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
LOCAL(PyObject*)
getslice(int isbytes, const void *ptr,
PyObject* string, Py_ssize_t start, Py_ssize_t end)
{
if (isbytes) {
if (PyBytes_CheckExact(string) &&
start == 0 && end == PyBytes_GET_SIZE(string)) {
Py_INCREF(string);
return string;
}
return PyBytes_FromStringAndSize(
(const char *)ptr + start, end - start);
}
else {
return PyUnicode_Substring(string, start, end);
}
}
LOCAL(PyObject*)
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
{
Py_ssize_t i, j;
index = (index - 1) * 2;
if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
if (empty)
/* want empty string */
i = j = 0;
else {
Py_RETURN_NONE;
}
} else {
i = STATE_OFFSET(state, state->mark[index]);
j = STATE_OFFSET(state, state->mark[index+1]);
}
return getslice(state->isbytes, state->beginning, string, i, j);
}
static void
pattern_error(Py_ssize_t status)
{
switch (status) {
case SRE_ERROR_RECURSION_LIMIT:
/* This error code seems to be unused. */
PyErr_SetString(
PyExc_RecursionError,
"maximum recursion limit exceeded"
);
break;
case SRE_ERROR_MEMORY:
PyErr_NoMemory();
break;
case SRE_ERROR_INTERRUPTED:
/* An exception has already been raised, so let it fly */
break;
default:
/* other error codes indicate compiler/engine bugs */
PyErr_SetString(
PyExc_RuntimeError,
"internal error in regular expression engine"
);
}
}
static void
pattern_dealloc(PatternObject* self)
{
PyTypeObject *tp = Py_TYPE(self);
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
Py_XDECREF(self->pattern);
Py_XDECREF(self->groupindex);
Py_XDECREF(self->indexgroup);
PyObject_DEL(self);
Py_DECREF(tp);
}
LOCAL(Py_ssize_t)
sre_match(SRE_STATE* state, SRE_CODE* pattern)
{
if (state->charsize == 1)
return sre_ucs1_match(state, pattern, 1);
assert(state->charsize == 4);
return sre_ucs4_match(state, pattern, 1);
}
LOCAL(Py_ssize_t)
sre_search(SRE_STATE* state, SRE_CODE* pattern)
{
if (state->charsize == 1)
return sre_ucs1_search(state, pattern);
assert(state->charsize == 4);
return sre_ucs4_search(state, pattern);
}
/*[clinic input]
_sre.SRE_Pattern.match
string: object
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Matches zero or more characters at the beginning of the string.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
{
SRE_STATE state;
Py_ssize_t status;
PyObject *match;
if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
return NULL;
state.ptr = state.start;
TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
status = sre_match(&state, PatternObject_GetCode(self));
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
if (PyErr_Occurred()) {
state_fini(&state);
return NULL;
}
match = pattern_new_match(self, &state, status);
state_fini(&state);
return match;
}
/*[clinic input]
_sre.SRE_Pattern.fullmatch
string: object
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Matches against all of the string.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
{
SRE_STATE state;
Py_ssize_t status;
PyObject *match;
if (!state_init(&state, self, string, pos, endpos))
return NULL;
state.ptr = state.start;
TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
state.match_all = 1;
status = sre_match(&state, PatternObject_GetCode(self));
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
if (PyErr_Occurred()) {
state_fini(&state);
return NULL;
}
match = pattern_new_match(self, &state, status);
state_fini(&state);
return match;
}
/*[clinic input]
_sre.SRE_Pattern.search
string: object
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Scan through string looking for a match, and return a corresponding match object instance.
Return None if no position in the string matches.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
{
SRE_STATE state;
Py_ssize_t status;
PyObject *match;
if (!state_init(&state, self, string, pos, endpos))
return NULL;
TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
status = sre_search(&state, PatternObject_GetCode(self));
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
if (PyErr_Occurred()) {
state_fini(&state);
return NULL;
}
match = pattern_new_match(self, &state, status);
state_fini(&state);
return match;
}
static PyObject*
call(const char* module, const char* function, PyObject* args)
{
PyObject* name;
PyObject* mod;
PyObject* func;
PyObject* result;
if (!args)
return NULL;
name = PyUnicode_FromString(module);
if (!name)
return NULL;
mod = PyImport_Import(name);
Py_DECREF(name);
if (!mod)
return NULL;
func = PyObject_GetAttrString(mod, function);
Py_DECREF(mod);
if (!func)
return NULL;
result = PyObject_CallObject(func, args);
Py_DECREF(func);
Py_DECREF(args);
return result;
}
/*[clinic input]
_sre.SRE_Pattern.findall
string: object
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Return a list of all non-overlapping matches of pattern in string.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
{
SRE_STATE state;
PyObject* list;
Py_ssize_t status;
Py_ssize_t i, b, e;
if (!state_init(&state, self, string, pos, endpos))
return NULL;
list = PyList_New(0);
if (!list) {
state_fini(&state);
return NULL;
}
while (state.start <= state.end) {
PyObject* item;
state_reset(&state);
state.ptr = state.start;
status = sre_search(&state, PatternObject_GetCode(self));
if (PyErr_Occurred())
goto error;
if (status <= 0) {
if (status == 0)
break;
pattern_error(status);
goto error;
}
/* don't bother to build a match object */
switch (self->groups) {
case 0:
b = STATE_OFFSET(&state, state.start);
e = STATE_OFFSET(&state, state.ptr);
item = getslice(state.isbytes, state.beginning,
string, b, e);
if (!item)
goto error;
break;
case 1:
item = state_getslice(&state, 1, string, 1);
if (!item)
goto error;
break;
default:
item = PyTuple_New(self->groups);
if (!item)
goto error;
for (i = 0; i < self->groups; i++) {
PyObject* o = state_getslice(&state, i+1, string, 1);
if (!o) {
Py_DECREF(item);
goto error;
}
PyTuple_SET_ITEM(item, i, o);
}
break;
}
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
state.must_advance = (state.ptr == state.start);
state.start = state.ptr;
}
state_fini(&state);
return list;
error:
Py_DECREF(list);
state_fini(&state);
return NULL;
}
/*[clinic input]
_sre.SRE_Pattern.finditer
string: object
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Return an iterator over all non-overlapping matches for the RE pattern in string.
For each match, the iterator returns a match object.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
{
PyObject* scanner;
PyObject* search;
PyObject* iterator;
scanner = pattern_scanner(self, string, pos, endpos);
if (!scanner)
return NULL;
search = PyObject_GetAttrString(scanner, "search");
Py_DECREF(scanner);
if (!search)
return NULL;
iterator = PyCallIter_New(search, Py_None);
Py_DECREF(search);
return iterator;
}
/*[clinic input]
_sre.SRE_Pattern.scanner
string: object
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
{
return pattern_scanner(self, string, pos, endpos);
}
/*[clinic input]
_sre.SRE_Pattern.split
string: object
maxsplit: Py_ssize_t = 0
Split string by the occurrences of pattern.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Py_ssize_t maxsplit)
/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
{
SRE_STATE state;
PyObject* list;
PyObject* item;
Py_ssize_t status;
Py_ssize_t n;
Py_ssize_t i;
void* last;
assert(self->codesize != 0);
if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
return NULL;
list = PyList_New(0);
if (!list) {
state_fini(&state);
return NULL;
}
n = 0;
last = state.start;
while (!maxsplit || n < maxsplit) {
state_reset(&state);
state.ptr = state.start;
status = sre_search(&state, PatternObject_GetCode(self));
if (PyErr_Occurred())
goto error;
if (status <= 0) {
if (status == 0)
break;
pattern_error(status);
goto error;
}
/* get segment before this match */
item = getslice(state.isbytes, state.beginning,
string, STATE_OFFSET(&state, last),
STATE_OFFSET(&state, state.start)
);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
/* add groups (if any) */
for (i = 0; i < self->groups; i++) {
item = state_getslice(&state, i+1, string, 0);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
n = n + 1;
state.must_advance = (state.ptr == state.start);
last = state.start = state.ptr;
}
/* get segment following last match (even if empty) */
item = getslice(state.isbytes, state.beginning,
string, STATE_OFFSET(&state, last), state.endpos
);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
state_fini(&state);
return list;
error:
Py_DECREF(list);
state_fini(&state);
return NULL;
}
static PyObject*
pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Py_ssize_t count, Py_ssize_t subn)
{
SRE_STATE state;
PyObject* list;
PyObject* joiner;
PyObject* item;
PyObject* filter;
PyObject* match;
void* ptr;
Py_ssize_t status;
Py_ssize_t n;
Py_ssize_t i, b, e;
int isbytes, charsize;
int filter_is_callable;
Py_buffer view;
if (PyCallable_Check(ptemplate)) {
/* sub/subn takes either a function or a template */
filter = ptemplate;
Py_INCREF(filter);
filter_is_callable = 1;
} else {
/* if not callable, check if it's a literal string */
int literal;
view.buf = NULL;
ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
b = charsize;
if (ptr) {
if (charsize == 1)
literal = memchr(ptr, '\\', n) == NULL;
else
literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
} else {
PyErr_Clear();
literal = 0;
}
if (view.buf)
PyBuffer_Release(&view);
if (literal) {
filter = ptemplate;
Py_INCREF(filter);
filter_is_callable = 0;
} else {
/* not a literal; hand it over to the template compiler */
filter = call(
SRE_PY_MODULE, "_subx",
PyTuple_Pack(2, self, ptemplate)
);
if (!filter)
return NULL;
filter_is_callable = PyCallable_Check(filter);
}
}
if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Py_DECREF(filter);
return NULL;
}
list = PyList_New(0);
if (!list) {
Py_DECREF(filter);
state_fini(&state);
return NULL;
}
n = i = 0;
while (!count || n < count) {
state_reset(&state);
state.ptr = state.start;
status = sre_search(&state, PatternObject_GetCode(self));
if (PyErr_Occurred())
goto error;
if (status <= 0) {
if (status == 0)
break;
pattern_error(status);
goto error;
}
b = STATE_OFFSET(&state, state.start);
e = STATE_OFFSET(&state, state.ptr);
if (i < b) {
/* get segment before this match */
item = getslice(state.isbytes, state.beginning,
string, i, b);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
if (filter_is_callable) {
/* pass match object through filter */
match = pattern_new_match(self, &state, 1);
if (!match)
goto error;
item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Py_DECREF(match);
if (!item)
goto error;
} else {
/* filter is literal string */
item = filter;
Py_INCREF(item);
}
/* add to list */
if (item != Py_None) {
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
i = e;
n = n + 1;
state.must_advance = (state.ptr == state.start);
state.start = state.ptr;
}
/* get segment following last match */
if (i < state.endpos) {
item = getslice(state.isbytes, state.beginning,
string, i, state.endpos);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
state_fini(&state);
Py_DECREF(filter);
/* convert list to single string (also removes list) */
joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
if (!joiner) {
Py_DECREF(list);
return NULL;
}
if (PyList_GET_SIZE(list) == 0) {
Py_DECREF(list);
item = joiner;
}
else {
if (state.isbytes)
item = _PyBytes_Join(joiner, list);
else
item = PyUnicode_Join(joiner, list);
Py_DECREF(joiner);
Py_DECREF(list);
if (!item)
return NULL;
}
if (subn)
return Py_BuildValue("Nn", item, n);
return item;
error:
Py_DECREF(list);
state_fini(&state);
Py_DECREF(filter);
return NULL;
}
/*[clinic input]
_sre.SRE_Pattern.sub
repl: object
string: object
count: Py_ssize_t = 0
Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
PyObject *string, Py_ssize_t count)
/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
{
return pattern_subx(self, repl, string, count, 0);
}
/*[clinic input]
_sre.SRE_Pattern.subn
repl: object
string: object
count: Py_ssize_t = 0
Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
PyObject *string, Py_ssize_t count)
/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
{
return pattern_subx(self, repl, string, count, 1);
}
/*[clinic input]
_sre.SRE_Pattern.__copy__
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject *self)
/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
{
Py_INCREF(self);
return (PyObject *)self;
}
/*[clinic input]
_sre.SRE_Pattern.__deepcopy__
memo: object
/
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
{
(void) memo;
Py_INCREF(self);
return (PyObject *)self;
}
static PyObject *
pattern_repr(PatternObject *obj)
{
static const struct {
const char *name;
int value;
} flag_names[] = {
{"re.TEMPLATE", SRE_FLAG_TEMPLATE},
{"re.IGNORECASE", SRE_FLAG_IGNORECASE},
{"re.LOCALE", SRE_FLAG_LOCALE},
{"re.MULTILINE", SRE_FLAG_MULTILINE},
{"re.DOTALL", SRE_FLAG_DOTALL},
{"re.UNICODE", SRE_FLAG_UNICODE},
{"re.VERBOSE", SRE_FLAG_VERBOSE},
{"re.DEBUG", SRE_FLAG_DEBUG},
{"re.ASCII", SRE_FLAG_ASCII},
};
PyObject *result = NULL;
PyObject *flag_items;
size_t i;
int flags = obj->flags;
/* Omit re.UNICODE for valid string patterns. */
if (obj->isbytes == 0 &&
(flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
SRE_FLAG_UNICODE)
flags &= ~SRE_FLAG_UNICODE;
flag_items = PyList_New(0);
if (!flag_items)
return NULL;
for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
if (flags & flag_names[i].value) {
PyObject *item = PyUnicode_FromString(flag_names[i].name);
if (!item)
goto done;
if (PyList_Append(flag_items, item) < 0) {
Py_DECREF(item);
goto done;
}
Py_DECREF(item);
flags &= ~flag_names[i].value;
}
}
if (flags) {
PyObject *item = PyUnicode_FromFormat("0x%x", flags);
if (!item)
goto done;
if (PyList_Append(flag_items, item) < 0) {
Py_DECREF(item);
goto done;
}
Py_DECREF(item);
}
if (PyList_Size(flag_items) > 0) {
PyObject *flags_result;
PyObject *sep = PyUnicode_FromString("|");
if (!sep)
goto done;
flags_result = PyUnicode_Join(sep, flag_items);
Py_DECREF(sep);
if (!flags_result)
goto done;
result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
obj->pattern, flags_result);
Py_DECREF(flags_result);
}
else {
result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
}
done:
Py_DECREF(flag_items);
return result;
}
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
/* PatternObject's 'groupindex' method. */
static PyObject *
pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
{
if (self->groupindex == NULL)
return PyDict_New();
return PyDictProxy_New(self->groupindex);
}
static int _validate(PatternObject *self); /* Forward */
/*[clinic input]
_sre.compile
pattern: object
flags: int
code: object(subclass_of='&PyList_Type')
groups: Py_ssize_t
groupindex: object(subclass_of='&PyDict_Type')
indexgroup: object(subclass_of='&PyTuple_Type')
[clinic start generated code]*/
static PyObject *
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
PyObject *code, Py_ssize_t groups, PyObject *groupindex,
PyObject *indexgroup)
/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
{
/* "compile" pattern descriptor to pattern object */
PatternObject* self;
Py_ssize_t i, n;
n = PyList_GET_SIZE(code);
/* coverity[ampersand_in_size] */
self = PyObject_NEW_VAR(PatternObject, (PyTypeObject *)_srestate(module)->Pattern_Type, n);
if (!self)
return NULL;
self->weakreflist = NULL;
self->pattern = NULL;
self->groupindex = NULL;
self->indexgroup = NULL;
self->codesize = n;
for (i = 0; i < n; i++) {
PyObject *o = PyList_GET_ITEM(code, i);
unsigned long value = PyLong_AsUnsignedLong(o);
self->code[i] = (SRE_CODE) value;
if ((unsigned long) self->code[i] != value) {
PyErr_SetString(PyExc_OverflowError,
"regular expression code size limit exceeded");
break;
}
}
if (PyErr_Occurred()) {
Py_DECREF(self);
return NULL;
}
if (pattern == Py_None) {
self->isbytes = -1;
}
else {
Py_ssize_t p_length;
int charsize;
Py_buffer view;
view.buf = NULL;
if (!getstring(pattern, &p_length, &self->isbytes,
&charsize, &view)) {
Py_DECREF(self);
return NULL;
}
if (view.buf)
PyBuffer_Release(&view);
}
Py_INCREF(pattern);
self->pattern = pattern;
self->flags = flags;
self->groups = groups;
if (PyDict_GET_SIZE(groupindex) > 0) {
Py_INCREF(groupindex);
self->groupindex = groupindex;
if (PyTuple_GET_SIZE(indexgroup) > 0) {
Py_INCREF(indexgroup);
self->indexgroup = indexgroup;
}
}
if (!_validate(self)) {
Py_DECREF(self);
return NULL;
}
return (PyObject*) self;
}
/* -------------------------------------------------------------------- */
/* Code validation */
/* To learn more about this code, have a look at the _compile() function in
Lib/sre_compile.py. The validation functions below checks the code array
for conformance with the code patterns generated there.
The nice thing about the generated code is that it is position-independent:
all jumps are relative jumps forward. Also, jumps don't cross each other:
the target of a later jump is always earlier than the target of an earlier
jump. IOW, this is okay:
J---------J-------T--------T
\ \_____/ /
\______________________/
but this is not:
J---------J-------T--------T
\_________\_____/ /
\____________/
It also helps that SRE_CODE is always an unsigned type.
*/
/* Defining this one enables tracing of the validator */
#undef VVERBOSE
/* Trace macro for the validator */
#if defined(VVERBOSE)
#define VTRACE(v) printf v
#else
#define VTRACE(v) do {} while(0) /* do nothing */
#endif
/* Report failure */
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
/* Extract opcode, argument, or skip count from code array */
#define GET_OP \
do { \
VTRACE(("%p: ", code)); \
if (code >= end) FAIL; \
op = *code++; \
VTRACE(("%lu (op)\n", (unsigned long)op)); \
} while (0)
#define GET_ARG \
do { \
VTRACE(("%p= ", code)); \
if (code >= end) FAIL; \
arg = *code++; \
VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
} while (0)
#define GET_SKIP_ADJ(adj) \
do { \
VTRACE(("%p= ", code)); \
if (code >= end) FAIL; \
skip = *code; \
VTRACE(("%lu (skip to %p)\n", \
(unsigned long)skip, code+skip)); \
if (skip-adj > (uintptr_t)(end - code)) \
FAIL; \
code++; \
} while (0)
#define GET_SKIP GET_SKIP_ADJ(0)
static int
_validate_charset(SRE_CODE *code, SRE_CODE *end)
{
/* Some variables are manipulated by the macros above */
SRE_CODE op;
SRE_CODE arg;
SRE_CODE offset;
int i;
while (code < end) {
GET_OP;
switch (op) {
case SRE_OP_NEGATE:
break;
case SRE_OP_LITERAL:
GET_ARG;
break;
case SRE_OP_RANGE:
case SRE_OP_RANGE_UNI_IGNORE:
GET_ARG;
GET_ARG;
break;
case SRE_OP_CHARSET:
offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
if (offset > (uintptr_t)(end - code))
FAIL;
code += offset;
break;
case SRE_OP_BIGCHARSET:
GET_ARG; /* Number of blocks */
offset = 256/sizeof(SRE_CODE); /* 256-byte table */
if (offset > (uintptr_t)(end - code))
FAIL;
/* Make sure that each byte points to a valid block */
for (i = 0; i < 256; i++) {
if (((unsigned char *)code)[i] >= arg)
FAIL;
}
code += offset;
offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
if (offset > (uintptr_t)(end - code))
FAIL;
code += offset;
break;
case SRE_OP_CATEGORY:
GET_ARG;
switch (arg) {
case SRE_CATEGORY_DIGIT:
case SRE_CATEGORY_NOT_DIGIT:
case SRE_CATEGORY_SPACE:
case SRE_CATEGORY_NOT_SPACE:
case SRE_CATEGORY_WORD:
case SRE_CATEGORY_NOT_WORD:
case SRE_CATEGORY_LINEBREAK:
case SRE_CATEGORY_NOT_LINEBREAK:
case SRE_CATEGORY_LOC_WORD:
case SRE_CATEGORY_LOC_NOT_WORD:
case SRE_CATEGORY_UNI_DIGIT:
case SRE_CATEGORY_UNI_NOT_DIGIT:
case SRE_CATEGORY_UNI_SPACE:
case SRE_CATEGORY_UNI_NOT_SPACE:
case SRE_CATEGORY_UNI_WORD:
case SRE_CATEGORY_UNI_NOT_WORD:
case SRE_CATEGORY_UNI_LINEBREAK:
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
break;
default:
FAIL;
}
break;
default:
FAIL;
}
}
return 1;
}
static int
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
/* Some variables are manipulated by the macros above */
SRE_CODE op;
SRE_CODE arg;
SRE_CODE skip;
VTRACE(("code=%p, end=%p\n", code, end));
if (code > end)
FAIL;
while (code < end) {
GET_OP;
switch (op) {
case SRE_OP_MARK:
/* We don't check whether marks are properly nested; the
sre_match() code is robust even if they don't, and the worst
you can get is nonsensical match results. */
GET_ARG;
if (arg > 2 * (size_t)groups + 1) {
VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
FAIL;
}
break;
case SRE_OP_LITERAL:
case SRE_OP_NOT_LITERAL:
case SRE_OP_LITERAL_IGNORE:
case SRE_OP_NOT_LITERAL_IGNORE:
case SRE_OP_LITERAL_UNI_IGNORE:
case SRE_OP_NOT_LITERAL_UNI_IGNORE:
case SRE_OP_LITERAL_LOC_IGNORE:
case SRE_OP_NOT_LITERAL_LOC_IGNORE:
GET_ARG;
/* The arg is just a character, nothing to check */
break;
case SRE_OP_SUCCESS:
case SRE_OP_FAILURE:
/* Nothing to check; these normally end the matching process */
break;
case SRE_OP_AT:
GET_ARG;
switch (arg) {
case SRE_AT_BEGINNING:
case SRE_AT_BEGINNING_STRING:
case SRE_AT_BEGINNING_LINE:
case SRE_AT_END:
case SRE_AT_END_LINE:
case SRE_AT_END_STRING:
case SRE_AT_BOUNDARY:
case SRE_AT_NON_BOUNDARY:
case SRE_AT_LOC_BOUNDARY:
case SRE_AT_LOC_NON_BOUNDARY:
case SRE_AT_UNI_BOUNDARY:
case SRE_AT_UNI_NON_BOUNDARY:
break;
default:
FAIL;
}
break;
case SRE_OP_ANY:
case SRE_OP_ANY_ALL:
/* These have no operands */
break;
case SRE_OP_IN:
case SRE_OP_IN_IGNORE:
case SRE_OP_IN_UNI_IGNORE:
case SRE_OP_IN_LOC_IGNORE:
GET_SKIP;
/* Stop 1 before the end; we check the FAILURE below */
if (!_validate_charset(code, code+skip-2))
FAIL;
if (code[skip-2] != SRE_OP_FAILURE)
FAIL;
code += skip-1;
break;
case SRE_OP_INFO:
{
/* A minimal info field is
<INFO> <1=skip> <2=flags> <3=min> <4=max>;
If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
more follows. */
SRE_CODE flags, i;
SRE_CODE *newcode;
GET_SKIP;
newcode = code+skip-1;
GET_ARG; flags = arg;
GET_ARG;
GET_ARG;
/* Check that only valid flags are present */
if ((flags & ~(SRE_INFO_PREFIX |
SRE_INFO_LITERAL |
SRE_INFO_CHARSET)) != 0)
FAIL;
/* PREFIX and CHARSET are mutually exclusive */
if ((flags & SRE_INFO_PREFIX) &&
(flags & SRE_INFO_CHARSET))
FAIL;
/* LITERAL implies PREFIX */
if ((flags & SRE_INFO_LITERAL) &&
!(flags & SRE_INFO_PREFIX))
FAIL;
/* Validate the prefix */
if (flags & SRE_INFO_PREFIX) {
SRE_CODE prefix_len;
GET_ARG; prefix_len = arg;
GET_ARG;
/* Here comes the prefix string */
if (prefix_len > (uintptr_t)(newcode - code))
FAIL;
code += prefix_len;
/* And here comes the overlap table */
if (prefix_len > (uintptr_t)(newcode - code))
FAIL;
/* Each overlap value should be < prefix_len */
for (i = 0; i < prefix_len; i++) {
if (code[i] >= prefix_len)
FAIL;
}
code += prefix_len;
}
/* Validate the charset */
if (flags & SRE_INFO_CHARSET) {
if (!_validate_charset(code, newcode-1))
FAIL;
if (newcode[-1] != SRE_OP_FAILURE)
FAIL;
code = newcode;
}
else if (code != newcode) {
VTRACE(("code=%p, newcode=%p\n", code, newcode));
FAIL;
}
}
break;
case SRE_OP_BRANCH:
{
SRE_CODE *target = NULL;
for (;;) {
GET_SKIP;
if (skip == 0)
break;
/* Stop 2 before the end; we check the JUMP below */
if (!_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
/* Check that it ends with a JUMP, and that each JUMP
has the same target */
GET_OP;
if (op != SRE_OP_JUMP)
FAIL;
GET_SKIP;
if (target == NULL)
target = code+skip-1;
else if (code+skip-1 != target)
FAIL;
}
}
break;
case SRE_OP_REPEAT_ONE:
case SRE_OP_MIN_REPEAT_ONE:
{
SRE_CODE min, max;
GET_SKIP;
GET_ARG; min = arg;
GET_ARG; max = arg;
if (min > max)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
GET_OP;
if (op != SRE_OP_SUCCESS)
FAIL;
}
break;
case SRE_OP_REPEAT:
{
SRE_CODE min, max;
GET_SKIP;
GET_ARG; min = arg;
GET_ARG; max = arg;
if (min > max)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
GET_OP;
if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
FAIL;
}
break;
case SRE_OP_GROUPREF:
case SRE_OP_GROUPREF_IGNORE:
case SRE_OP_GROUPREF_UNI_IGNORE:
case SRE_OP_GROUPREF_LOC_IGNORE:
GET_ARG;
if (arg >= (size_t)groups)
FAIL;
break;
case SRE_OP_GROUPREF_EXISTS:
/* The regex syntax for this is: '(?(group)then|else)', where
'group' is either an integer group number or a group name,
'then' and 'else' are sub-regexes, and 'else' is optional. */
GET_ARG;
if (arg >= (size_t)groups)
FAIL;
GET_SKIP_ADJ(1);
code--; /* The skip is relative to the first arg! */
/* There are two possibilities here: if there is both a 'then'
part and an 'else' part, the generated code looks like:
GROUPREF_EXISTS
<group>
<skipyes>
...then part...
JUMP
<skipno>
(<skipyes> jumps here)
...else part...
(<skipno> jumps here)
If there is only a 'then' part, it looks like:
GROUPREF_EXISTS
<group>
<skip>
...then part...
(<skip> jumps here)
There is no direct way to decide which it is, and we don't want
to allow arbitrary jumps anywhere in the code; so we just look
for a JUMP opcode preceding our skip target.
*/
if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
code[skip-3] == SRE_OP_JUMP)
{
VTRACE(("both then and else parts present\n"));
if (!_validate_inner(code+1, code+skip-3, groups))
FAIL;
code += skip-2; /* Position after JUMP, at <skipno> */
GET_SKIP;
if (!_validate_inner(code, code+skip-1, groups))
FAIL;
code += skip-1;
}
else {
VTRACE(("only a then part present\n"));
if (!_validate_inner(code+1, code+skip-1, groups))
FAIL;
code += skip-1;
}
break;
case SRE_OP_ASSERT:
case SRE_OP_ASSERT_NOT:
GET_SKIP;
GET_ARG; /* 0 for lookahead, width for lookbehind */
code--; /* Back up over arg to simplify math below */
if (arg & 0x80000000)
FAIL; /* Width too large */
/* Stop 1 before the end; we check the SUCCESS below */
if (!_validate_inner(code+1, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
if (op != SRE_OP_SUCCESS)
FAIL;
break;
default:
FAIL;
}
}
VTRACE(("okay\n"));
return 1;
}
static int
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
code >= end || end[-1] != SRE_OP_SUCCESS)
FAIL;
return _validate_inner(code, end-1, groups);
}
static int
_validate(PatternObject *self)
{
if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
{
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
return 0;
}
else
VTRACE(("Success!\n"));
return 1;
}
/* -------------------------------------------------------------------- */
/* match methods */
static void
match_dealloc(MatchObject* self)
{
PyTypeObject *tp = Py_TYPE(self);
Py_XDECREF(self->regs);
Py_XDECREF(self->string);
Py_DECREF(self->pattern);
PyObject_DEL(self);
Py_DECREF(tp);
}
static PyObject*
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
{
Py_ssize_t length;
int isbytes, charsize;
Py_buffer view;
PyObject *result;
void* ptr;
Py_ssize_t i, j;
if (index < 0 || index >= self->groups) {
/* raise IndexError if we were given a bad group number */
PyErr_SetString(
PyExc_IndexError,
"no such group"
);
return NULL;
}
index *= 2;
if (self->string == Py_None || self->mark[index] < 0) {
/* return default value if the string or group is undefined */
Py_INCREF(def);
return def;
}
ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
if (ptr == NULL)
return NULL;
i = self->mark[index];
j = self->mark[index+1];
i = Py_MIN(i, length);
j = Py_MIN(j, length);
result = getslice(isbytes, ptr, self->string, i, j);
if (isbytes && view.buf != NULL)
PyBuffer_Release(&view);
return result;
}
static Py_ssize_t
match_getindex(MatchObject* self, PyObject* index)
{
Py_ssize_t i;
if (index == NULL)
/* Default value */
return 0;
if (PyIndex_Check(index)) {
return PyNumber_AsSsize_t(index, NULL);
}
i = -1;
if (self->pattern->groupindex) {
index = PyDict_GetItem(self->pattern->groupindex, index);
if (index && PyLong_Check(index)) {
i = PyLong_AsSsize_t(index);
}
}
return i;
}
static PyObject*
match_getslice(MatchObject* self, PyObject* index, PyObject* def)
{
return match_getslice_by_index(self, match_getindex(self, index), def);
}
/*[clinic input]
_sre.SRE_Match.expand
template_obj: object
Return the string obtained by doing backslash substitution on the string template_obj, as done by the sub() method.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template_obj)
/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
{
/* delegate to Python code */
return call(
SRE_PY_MODULE, "_expand",
PyTuple_Pack(3, self->pattern, self, template_obj)
);
}
static PyObject*
match_group(MatchObject* self, PyObject* args)
{
PyObject* result;
Py_ssize_t i, size;
size = PyTuple_GET_SIZE(args);
switch (size) {
case 0:
result = match_getslice(self, _PyLong_Zero, Py_None);
break;
case 1:
result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
break;
default:
/* fetch multiple items */
result = PyTuple_New(size);
if (!result)
return NULL;
for (i = 0; i < size; i++) {
PyObject* item = match_getslice(
self, PyTuple_GET_ITEM(args, i), Py_None
);
if (!item) {
Py_DECREF(result);
return NULL;
}
PyTuple_SET_ITEM(result, i, item);
}
break;
}
return result;
}
static PyObject*
match_getitem(MatchObject* self, PyObject* name)
{
return match_getslice(self, name, Py_None);
}
/*[clinic input]
_sre.SRE_Match.groups
default: object = None
Is used for groups that did not participate in the match.
Return a tuple containing all the subgroups of the match, from 1.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
{
PyObject* result;
Py_ssize_t index;
result = PyTuple_New(self->groups-1);
if (!result)
return NULL;
for (index = 1; index < self->groups; index++) {
PyObject* item;
item = match_getslice_by_index(self, index, default_value);
if (!item) {
Py_DECREF(result);
return NULL;
}
PyTuple_SET_ITEM(result, index-1, item);
}
return result;
}
/*[clinic input]
_sre.SRE_Match.groupdict
default: object = None
Is used for groups that did not participate in the match.
Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
{
PyObject *result;
PyObject *key;
PyObject *value;
Py_ssize_t pos = 0;
Py_hash_t hash;
result = PyDict_New();
if (!result || !self->pattern->groupindex)
return result;
while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
int status;
Py_INCREF(key);
value = match_getslice(self, key, default_value);
if (!value) {
Py_DECREF(key);
goto failed;
}
status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Py_DECREF(value);
Py_DECREF(key);
if (status < 0)
goto failed;
}
return result;
failed:
Py_DECREF(result);
return NULL;
}
/*[clinic input]
_sre.SRE_Match.start -> Py_ssize_t
group: object(c_default="NULL") = 0
/
Return index of the start of the substring matched by group.
[clinic start generated code]*/
static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
{
Py_ssize_t index = match_getindex(self, group);
if (index < 0 || index >= self->groups) {
PyErr_SetString(
PyExc_IndexError,
"no such group"
);
return -1;
}
/* mark is -1 if group is undefined */
return self->mark[index*2];
}
/*[clinic input]
_sre.SRE_Match.end -> Py_ssize_t
group: object(c_default="NULL") = 0
/
Return index of the end of the substring matched by group.
[clinic start generated code]*/
static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
{
Py_ssize_t index = match_getindex(self, group);
if (index < 0 || index >= self->groups) {
PyErr_SetString(
PyExc_IndexError,
"no such group"
);
return -1;
}
/* mark is -1 if group is undefined */
return self->mark[index*2+1];
}
LOCAL(PyObject*)
_pair(Py_ssize_t i1, Py_ssize_t i2)
{
PyObject* pair;
PyObject* item;
pair = PyTuple_New(2);
if (!pair)
return NULL;
item = PyLong_FromSsize_t(i1);
if (!item)
goto error;
PyTuple_SET_ITEM(pair, 0, item);
item = PyLong_FromSsize_t(i2);
if (!item)
goto error;
PyTuple_SET_ITEM(pair, 1, item);
return pair;
error:
Py_DECREF(pair);
return NULL;
}
/*[clinic input]
_sre.SRE_Match.span
group: object(c_default="NULL") = 0
/
For match object m, return the 2-tuple (m.start(group), m.end(group)).
[clinic start generated code]*/
static PyObject *
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
{
Py_ssize_t index = match_getindex(self, group);
if (index < 0 || index >= self->groups) {
PyErr_SetString(
PyExc_IndexError,
"no such group"
);
return NULL;
}
/* marks are -1 if group is undefined */
return _pair(self->mark[index*2], self->mark[index*2+1]);
}
static PyObject*
match_regs(MatchObject* self)
{
PyObject* regs;
PyObject* item;
Py_ssize_t index;
regs = PyTuple_New(self->groups);
if (!regs)
return NULL;
for (index = 0; index < self->groups; index++) {
item = _pair(self->mark[index*2], self->mark[index*2+1]);
if (!item) {
Py_DECREF(regs);
return NULL;
}
PyTuple_SET_ITEM(regs, index, item);
}
Py_INCREF(regs);
self->regs = regs;
return regs;
}
/*[clinic input]
_sre.SRE_Match.__copy__
[clinic start generated code]*/
static PyObject *
_sre_SRE_Match___copy___impl(MatchObject *self)
/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
{
Py_INCREF(self);
return (PyObject *)self;
}
/*[clinic input]
_sre.SRE_Match.__deepcopy__
memo: object
/
[clinic start generated code]*/
static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
{
(void) memo;
Py_INCREF(self);
return (PyObject *)self;
}
PyDoc_STRVAR(match_doc,
"The result of re.match() and re.search().\n\
Match objects always have a boolean value of True.");
PyDoc_STRVAR(match_group_doc,
"group([group1, ...]) -> str or tuple.\n\
Return subgroup(s) of the match by indices or names.\n\
For 0 returns the entire match.");
static PyObject *
match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
{
if (self->lastindex >= 0)
return PyLong_FromSsize_t(self->lastindex);
Py_RETURN_NONE;
}
static PyObject *
match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
{
if (self->pattern->indexgroup &&
self->lastindex >= 0 &&
self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
{
PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
self->lastindex);
Py_INCREF(result);
return result;
}
Py_RETURN_NONE;
}
static PyObject *
match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
{
if (self->regs) {
Py_INCREF(self->regs);
return self->regs;
} else
return match_regs(self);
}
static PyObject *
match_repr(MatchObject *self)
{
PyObject *result;
PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
if (group0 == NULL)
return NULL;
result = PyUnicode_FromFormat(
"<%s object; span=(%zd, %zd), match=%.50R>",
_PyType_Name(Py_TYPE(self)),
self->mark[0], self->mark[1], group0);
Py_DECREF(group0);
return result;
}
static PyObject*
pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
{
/* create match object (from state object) */
MatchObject* match;
Py_ssize_t i, j;
char* base;
int n;
if (status > 0) {
/* create match object (with room for extra group marks) */
/* coverity[ampersand_in_size] */
match = PyObject_NEW_VAR(MatchObject, (PyTypeObject *)_srestate_global->Match_Type,
2*(pattern->groups+1));
if (!match)
return NULL;
Py_INCREF(pattern);
match->pattern = pattern;
Py_INCREF(state->string);
match->string = state->string;
match->regs = NULL;
match->groups = pattern->groups+1;
/* fill in group slices */
base = (char*) state->beginning;
n = state->charsize;
match->mark[0] = ((char*) state->start - base) / n;
match->mark[1] = ((char*) state->ptr - base) / n;
for (i = j = 0; i < pattern->groups; i++, j+=2)
if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
match->mark[j+2] = ((char*) state->mark[j] - base) / n;
match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
} else
match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
match->pos = state->pos;
match->endpos = state->endpos;
match->lastindex = state->lastindex;
return (PyObject*) match;
} else if (status == 0) {
/* no match */
Py_RETURN_NONE;
}
/* internal error */
pattern_error(status);
return NULL;
}
/* -------------------------------------------------------------------- */
/* scanner methods (experimental) */
static void
scanner_dealloc(ScannerObject* self)
{
PyTypeObject *tp = Py_TYPE(self);
state_fini(&self->state);
Py_XDECREF(self->pattern);
PyObject_DEL(self);
Py_DECREF(tp);
}
/*[clinic input]
_sre.SRE_Scanner.match
[clinic start generated code]*/
static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject *self)
/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
{
SRE_STATE* state = &self->state;
PyObject* match;
Py_ssize_t status;
if (state->start == NULL)
Py_RETURN_NONE;
state_reset(state);
state->ptr = state->start;
status = sre_match(state, PatternObject_GetCode(self->pattern));
if (PyErr_Occurred())
return NULL;
match = pattern_new_match((PatternObject*) self->pattern,
state, status);
if (status == 0)
state->start = NULL;
else {
state->must_advance = (state->ptr == state->start);
state->start = state->ptr;
}
return match;
}
/*[clinic input]
_sre.SRE_Scanner.search
[clinic start generated code]*/
static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject *self)
/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
{
SRE_STATE* state = &self->state;
PyObject* match;
Py_ssize_t status;
if (state->start == NULL)
Py_RETURN_NONE;
state_reset(state);
state->ptr = state->start;
status = sre_search(state, PatternObject_GetCode(self->pattern));
if (PyErr_Occurred())
return NULL;
match = pattern_new_match((PatternObject*) self->pattern,
state, status);
if (status == 0)
state->start = NULL;
else {
state->must_advance = (state->ptr == state->start);
state->start = state->ptr;
}
return match;
}
static PyObject *
pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
{
ScannerObject* scanner;
/* create scanner object */
scanner = PyObject_NEW(ScannerObject, (PyTypeObject *)_srestate_global->Scanner_Type);
if (!scanner)
return NULL;
scanner->pattern = NULL;
/* create search state object */
if (!state_init(&scanner->state, self, string, pos, endpos)) {
Py_DECREF(scanner);
return NULL;
}
Py_INCREF(self);
scanner->pattern = (PyObject*) self;
return (PyObject*) scanner;
}
static Py_hash_t
pattern_hash(PatternObject *self)
{
Py_hash_t hash, hash2;
hash = PyObject_Hash(self->pattern);
if (hash == -1) {
return -1;
}
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
hash ^= hash2;
hash ^= self->flags;
hash ^= self->isbytes;
hash ^= self->codesize;
if (hash == -1) {
hash = -2;
}
return hash;
}
static PyObject*
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
{
PatternObject *left, *right;
int cmp;
if (op != Py_EQ && op != Py_NE) {
Py_RETURN_NOTIMPLEMENTED;
}
if (Py_TYPE(lefto) != (PyTypeObject *)_srestate_global->Pattern_Type ||
Py_TYPE(righto) != (PyTypeObject *)_srestate_global->Pattern_Type) {
Py_RETURN_NOTIMPLEMENTED;
}
if (lefto == righto) {
/* a pattern is equal to itself */
return PyBool_FromLong(op == Py_EQ);
}
left = (PatternObject *)lefto;
right = (PatternObject *)righto;
cmp = (left->flags == right->flags
&& left->isbytes == right->isbytes
&& left->codesize == right->codesize);
if (cmp) {
/* Compare the code and the pattern because the same pattern can
produce different codes depending on the locale used to compile the
pattern when the re.LOCALE flag is used. Don't compare groups,
indexgroup nor groupindex: they are derivated from the pattern. */
cmp = (memcmp(left->code, right->code,
sizeof(left->code[0]) * left->codesize) == 0);
}
if (cmp) {
cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
Py_EQ);
if (cmp < 0) {
return NULL;
}
}
if (op == Py_NE) {
cmp = !cmp;
}
return PyBool_FromLong(cmp);
}
// clang-format off
/*[clinic input]
preserve
[clinic start generated code]*/
PyDoc_STRVAR(_sre_getcodesize__doc__,
"getcodesize($module, /)\n"
"--\n"
"\n");
#define _SRE_GETCODESIZE_METHODDEF \
{"getcodesize", (PyCFunction)((void*)_sre_getcodesize), METH_NOARGS, _sre_getcodesize__doc__},
static int
_sre_getcodesize_impl(PyObject *module);
static PyObject *
_sre_getcodesize(PyObject *module, PyObject *Py_UNUSED(ignored))
{
PyObject *return_value = NULL;
int _return_value;
_return_value = _sre_getcodesize_impl(module);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromLong((long)_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_ascii_iscased__doc__,
"ascii_iscased($module, character, /)\n"
"--\n"
"\n");
#define _SRE_ASCII_ISCASED_METHODDEF \
{"ascii_iscased", (PyCFunction)((void*)_sre_ascii_iscased), METH_O, _sre_ascii_iscased__doc__},
static int
_sre_ascii_iscased_impl(PyObject *module, int character);
static PyObject *
_sre_ascii_iscased(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
int character;
int _return_value;
if (!PyArg_Parse(arg, "i:ascii_iscased", &character)) {
goto exit;
}
_return_value = _sre_ascii_iscased_impl(module, character);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyBool_FromLong((long)_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_unicode_iscased__doc__,
"unicode_iscased($module, character, /)\n"
"--\n"
"\n");
#define _SRE_UNICODE_ISCASED_METHODDEF \
{"unicode_iscased", (PyCFunction)((void*)_sre_unicode_iscased), METH_O, _sre_unicode_iscased__doc__},
static int
_sre_unicode_iscased_impl(PyObject *module, int character);
static PyObject *
_sre_unicode_iscased(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
int character;
int _return_value;
if (!PyArg_Parse(arg, "i:unicode_iscased", &character)) {
goto exit;
}
_return_value = _sre_unicode_iscased_impl(module, character);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyBool_FromLong((long)_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_ascii_tolower__doc__,
"ascii_tolower($module, character, /)\n"
"--\n"
"\n");
#define _SRE_ASCII_TOLOWER_METHODDEF \
{"ascii_tolower", (PyCFunction)((void*)_sre_ascii_tolower), METH_O, _sre_ascii_tolower__doc__},
static int
_sre_ascii_tolower_impl(PyObject *module, int character);
static PyObject *
_sre_ascii_tolower(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
int character;
int _return_value;
if (!PyArg_Parse(arg, "i:ascii_tolower", &character)) {
goto exit;
}
_return_value = _sre_ascii_tolower_impl(module, character);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromLong((long)_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_unicode_tolower__doc__,
"unicode_tolower($module, character, /)\n"
"--\n"
"\n");
#define _SRE_UNICODE_TOLOWER_METHODDEF \
{"unicode_tolower", (PyCFunction)((void*)_sre_unicode_tolower), METH_O, _sre_unicode_tolower__doc__},
static int
_sre_unicode_tolower_impl(PyObject *module, int character);
static PyObject *
_sre_unicode_tolower(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
int character;
int _return_value;
if (!PyArg_Parse(arg, "i:unicode_tolower", &character)) {
goto exit;
}
_return_value = _sre_unicode_tolower_impl(module, character);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromLong((long)_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__,
"match($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n"
"\n"
"Matches zero or more characters at the beginning of the string.");
#define _SRE_SRE_PATTERN_MATCH_METHODDEF \
{"match", (PyCFunction)((void*)_sre_SRE_Pattern_match), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_match__doc__},
static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos);
static PyObject *
_sre_SRE_Pattern_match(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "pos", "endpos", NULL};
static _PyArg_Parser _parser = {"O|nn:match", _keywords, 0};
PyObject *string;
Py_ssize_t pos = 0;
Py_ssize_t endpos = PY_SSIZE_T_MAX;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&string, &pos, &endpos)) {
goto exit;
}
return_value = _sre_SRE_Pattern_match_impl(self, string, pos, endpos);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__,
"fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n"
"\n"
"Matches against all of the string.");
#define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \
{"fullmatch", (PyCFunction)((void*)_sre_SRE_Pattern_fullmatch), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__},
static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos);
static PyObject *
_sre_SRE_Pattern_fullmatch(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "pos", "endpos", NULL};
static _PyArg_Parser _parser = {"O|nn:fullmatch", _keywords, 0};
PyObject *string;
Py_ssize_t pos = 0;
Py_ssize_t endpos = PY_SSIZE_T_MAX;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&string, &pos, &endpos)) {
goto exit;
}
return_value = _sre_SRE_Pattern_fullmatch_impl(self, string, pos, endpos);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_search__doc__,
"search($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n"
"\n"
"Scan through string looking for a match, and return a corresponding match object instance.\n"
"\n"
"Return None if no position in the string matches.");
#define _SRE_SRE_PATTERN_SEARCH_METHODDEF \
{"search", (PyCFunction)((void*)_sre_SRE_Pattern_search), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_search__doc__},
static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos);
static PyObject *
_sre_SRE_Pattern_search(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "pos", "endpos", NULL};
static _PyArg_Parser _parser = {"O|nn:search", _keywords, 0};
PyObject *string;
Py_ssize_t pos = 0;
Py_ssize_t endpos = PY_SSIZE_T_MAX;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&string, &pos, &endpos)) {
goto exit;
}
return_value = _sre_SRE_Pattern_search_impl(self, string, pos, endpos);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_findall__doc__,
"findall($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n"
"\n"
"Return a list of all non-overlapping matches of pattern in string.");
#define _SRE_SRE_PATTERN_FINDALL_METHODDEF \
{"findall", (PyCFunction)((void*)_sre_SRE_Pattern_findall), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_findall__doc__},
static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos);
static PyObject *
_sre_SRE_Pattern_findall(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "pos", "endpos", NULL};
static _PyArg_Parser _parser = {"O|nn:findall", _keywords, 0};
PyObject *string;
Py_ssize_t pos = 0;
Py_ssize_t endpos = PY_SSIZE_T_MAX;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&string, &pos, &endpos)) {
goto exit;
}
return_value = _sre_SRE_Pattern_findall_impl(self, string, pos, endpos);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_finditer__doc__,
"finditer($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n"
"\n"
"Return an iterator over all non-overlapping matches for the RE pattern in string.\n"
"\n"
"For each match, the iterator returns a match object.");
#define _SRE_SRE_PATTERN_FINDITER_METHODDEF \
{"finditer", (PyCFunction)((void*)_sre_SRE_Pattern_finditer), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_finditer__doc__},
static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos);
static PyObject *
_sre_SRE_Pattern_finditer(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "pos", "endpos", NULL};
static _PyArg_Parser _parser = {"O|nn:finditer", _keywords, 0};
PyObject *string;
Py_ssize_t pos = 0;
Py_ssize_t endpos = PY_SSIZE_T_MAX;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&string, &pos, &endpos)) {
goto exit;
}
return_value = _sre_SRE_Pattern_finditer_impl(self, string, pos, endpos);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_scanner__doc__,
"scanner($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n"
"\n");
#define _SRE_SRE_PATTERN_SCANNER_METHODDEF \
{"scanner", (PyCFunction)((void*)_sre_SRE_Pattern_scanner), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_scanner__doc__},
static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos);
static PyObject *
_sre_SRE_Pattern_scanner(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "pos", "endpos", NULL};
static _PyArg_Parser _parser = {"O|nn:scanner", _keywords, 0};
PyObject *string;
Py_ssize_t pos = 0;
Py_ssize_t endpos = PY_SSIZE_T_MAX;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&string, &pos, &endpos)) {
goto exit;
}
return_value = _sre_SRE_Pattern_scanner_impl(self, string, pos, endpos);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_split__doc__,
"split($self, /, string, maxsplit=0)\n"
"--\n"
"\n"
"Split string by the occurrences of pattern.");
#define _SRE_SRE_PATTERN_SPLIT_METHODDEF \
{"split", (PyCFunction)((void*)_sre_SRE_Pattern_split), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_split__doc__},
static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Py_ssize_t maxsplit);
static PyObject *
_sre_SRE_Pattern_split(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "maxsplit", NULL};
static _PyArg_Parser _parser = {"O|n:split", _keywords, 0};
PyObject *string;
Py_ssize_t maxsplit = 0;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&string, &maxsplit)) {
goto exit;
}
return_value = _sre_SRE_Pattern_split_impl(self, string, maxsplit);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_sub__doc__,
"sub($self, /, repl, string, count=0)\n"
"--\n"
"\n"
"Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.");
#define _SRE_SRE_PATTERN_SUB_METHODDEF \
{"sub", (PyCFunction)((void*)_sre_SRE_Pattern_sub), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_sub__doc__},
static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
PyObject *string, Py_ssize_t count);
static PyObject *
_sre_SRE_Pattern_sub(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"repl", "string", "count", NULL};
static _PyArg_Parser _parser = {"OO|n:sub", _keywords, 0};
PyObject *repl;
PyObject *string;
Py_ssize_t count = 0;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&repl, &string, &count)) {
goto exit;
}
return_value = _sre_SRE_Pattern_sub_impl(self, repl, string, count);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern_subn__doc__,
"subn($self, /, repl, string, count=0)\n"
"--\n"
"\n"
"Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.");
#define _SRE_SRE_PATTERN_SUBN_METHODDEF \
{"subn", (PyCFunction)((void*)_sre_SRE_Pattern_subn), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Pattern_subn__doc__},
static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
PyObject *string, Py_ssize_t count);
static PyObject *
_sre_SRE_Pattern_subn(PatternObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"repl", "string", "count", NULL};
static _PyArg_Parser _parser = {"OO|n:subn", _keywords, 0};
PyObject *repl;
PyObject *string;
Py_ssize_t count = 0;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&repl, &string, &count)) {
goto exit;
}
return_value = _sre_SRE_Pattern_subn_impl(self, repl, string, count);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Pattern___copy____doc__,
"__copy__($self, /)\n"
"--\n"
"\n");
#define _SRE_SRE_PATTERN___COPY___METHODDEF \
{"__copy__", (PyCFunction)((void*)_sre_SRE_Pattern___copy__), METH_NOARGS, _sre_SRE_Pattern___copy____doc__},
static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject *self);
static PyObject *
_sre_SRE_Pattern___copy__(PatternObject *self, PyObject *Py_UNUSED(ignored))
{
return _sre_SRE_Pattern___copy___impl(self);
}
PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__,
"__deepcopy__($self, memo, /)\n"
"--\n"
"\n");
#define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF \
{"__deepcopy__", (PyCFunction)((void*)_sre_SRE_Pattern___deepcopy__), METH_O, _sre_SRE_Pattern___deepcopy____doc__},
PyDoc_STRVAR(_sre_compile__doc__,
"compile($module, /, pattern, flags, code, groups, groupindex,\n"
" indexgroup)\n"
"--\n"
"\n");
#define _SRE_COMPILE_METHODDEF \
{"compile", (PyCFunction)((void*)_sre_compile), METH_FASTCALL | METH_KEYWORDS, _sre_compile__doc__},
static PyObject *
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
PyObject *code, Py_ssize_t groups, PyObject *groupindex,
PyObject *indexgroup);
static PyObject *
_sre_compile(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", NULL};
static _PyArg_Parser _parser = {"OiO!nO!O!:compile", _keywords, 0};
PyObject *pattern;
int flags;
PyObject *code;
Py_ssize_t groups;
PyObject *groupindex;
PyObject *indexgroup;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&pattern, &flags, &PyList_Type, &code, &groups, &PyDict_Type, &groupindex, &PyTuple_Type, &indexgroup)) {
goto exit;
}
return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Match_expand__doc__,
"expand($self, /, template_obj)\n"
"--\n"
"\n"
"Return the string obtained by doing backslash substitution on the string template_obj, as done by the sub() method.");
#define _SRE_SRE_MATCH_EXPAND_METHODDEF \
{"expand", (PyCFunction)((void*)_sre_SRE_Match_expand), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Match_expand__doc__},
static PyObject *
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template_obj);
static PyObject *
_sre_SRE_Match_expand(MatchObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"template_obj", NULL};
static _PyArg_Parser _parser = {"O:expand", _keywords, 0};
PyObject *template_obj;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&template_obj)) {
goto exit;
}
return_value = _sre_SRE_Match_expand_impl(self, template_obj);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Match_groups__doc__,
"groups($self, /, default=None)\n"
"--\n"
"\n"
"Return a tuple containing all the subgroups of the match, from 1.\n"
"\n"
" default\n"
" Is used for groups that did not participate in the match.");
#define _SRE_SRE_MATCH_GROUPS_METHODDEF \
{"groups", (PyCFunction)((void*)_sre_SRE_Match_groups), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Match_groups__doc__},
static PyObject *
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value);
static PyObject *
_sre_SRE_Match_groups(MatchObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"default", NULL};
static _PyArg_Parser _parser = {"|O:groups", _keywords, 0};
PyObject *default_value = Py_None;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&default_value)) {
goto exit;
}
return_value = _sre_SRE_Match_groups_impl(self, default_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Match_groupdict__doc__,
"groupdict($self, /, default=None)\n"
"--\n"
"\n"
"Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.\n"
"\n"
" default\n"
" Is used for groups that did not participate in the match.");
#define _SRE_SRE_MATCH_GROUPDICT_METHODDEF \
{"groupdict", (PyCFunction)((void*)_sre_SRE_Match_groupdict), METH_FASTCALL | METH_KEYWORDS, _sre_SRE_Match_groupdict__doc__},
static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value);
static PyObject *
_sre_SRE_Match_groupdict(MatchObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"default", NULL};
static _PyArg_Parser _parser = {"|O:groupdict", _keywords, 0};
PyObject *default_value = Py_None;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
&default_value)) {
goto exit;
}
return_value = _sre_SRE_Match_groupdict_impl(self, default_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Match_start__doc__,
"start($self, group=0, /)\n"
"--\n"
"\n"
"Return index of the start of the substring matched by group.");
#define _SRE_SRE_MATCH_START_METHODDEF \
{"start", (PyCFunction)((void*)_sre_SRE_Match_start), METH_VARARGS, _sre_SRE_Match_start__doc__},
static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group);
static PyObject *
_sre_SRE_Match_start(MatchObject *self, PyObject *args)
{
PyObject *return_value = NULL;
PyObject *group = NULL;
Py_ssize_t _return_value;
if (!PyArg_UnpackTuple(args, "start",
0, 1,
&group)) {
goto exit;
}
_return_value = _sre_SRE_Match_start_impl(self, group);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromSsize_t(_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Match_end__doc__,
"end($self, group=0, /)\n"
"--\n"
"\n"
"Return index of the end of the substring matched by group.");
#define _SRE_SRE_MATCH_END_METHODDEF \
{"end", (PyCFunction)((void*)_sre_SRE_Match_end), METH_VARARGS, _sre_SRE_Match_end__doc__},
static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group);
static PyObject *
_sre_SRE_Match_end(MatchObject *self, PyObject *args)
{
PyObject *return_value = NULL;
PyObject *group = NULL;
Py_ssize_t _return_value;
if (!PyArg_UnpackTuple(args, "end",
0, 1,
&group)) {
goto exit;
}
_return_value = _sre_SRE_Match_end_impl(self, group);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromSsize_t(_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Match_span__doc__,
"span($self, group=0, /)\n"
"--\n"
"\n"
"For match object m, return the 2-tuple (m.start(group), m.end(group)).");
#define _SRE_SRE_MATCH_SPAN_METHODDEF \
{"span", (PyCFunction)((void*)_sre_SRE_Match_span), METH_VARARGS, _sre_SRE_Match_span__doc__},
static PyObject *
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group);
static PyObject *
_sre_SRE_Match_span(MatchObject *self, PyObject *args)
{
PyObject *return_value = NULL;
PyObject *group = NULL;
if (!PyArg_UnpackTuple(args, "span",
0, 1,
&group)) {
goto exit;
}
return_value = _sre_SRE_Match_span_impl(self, group);
exit:
return return_value;
}
PyDoc_STRVAR(_sre_SRE_Match___copy____doc__,
"__copy__($self, /)\n"
"--\n"
"\n");
#define _SRE_SRE_MATCH___COPY___METHODDEF \
{"__copy__", (PyCFunction)((void*)_sre_SRE_Match___copy__), METH_NOARGS, _sre_SRE_Match___copy____doc__},
static PyObject *
_sre_SRE_Match___copy___impl(MatchObject *self);
static PyObject *
_sre_SRE_Match___copy__(MatchObject *self, PyObject *Py_UNUSED(ignored))
{
return _sre_SRE_Match___copy___impl(self);
}
PyDoc_STRVAR(_sre_SRE_Match___deepcopy____doc__,
"__deepcopy__($self, memo, /)\n"
"--\n"
"\n");
#define _SRE_SRE_MATCH___DEEPCOPY___METHODDEF \
{"__deepcopy__", (PyCFunction)((void*)_sre_SRE_Match___deepcopy__), METH_O, _sre_SRE_Match___deepcopy____doc__},
PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__,
"match($self, /)\n"
"--\n"
"\n");
#define _SRE_SRE_SCANNER_MATCH_METHODDEF \
{"match", (PyCFunction)((void*)_sre_SRE_Scanner_match), METH_NOARGS, _sre_SRE_Scanner_match__doc__},
static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject *self);
static PyObject *
_sre_SRE_Scanner_match(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return _sre_SRE_Scanner_match_impl(self);
}
PyDoc_STRVAR(_sre_SRE_Scanner_search__doc__,
"search($self, /)\n"
"--\n"
"\n");
#define _SRE_SRE_SCANNER_SEARCH_METHODDEF \
{"search", (PyCFunction)((void*)_sre_SRE_Scanner_search), METH_NOARGS, _sre_SRE_Scanner_search__doc__},
static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject *self);
static PyObject *
_sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return _sre_SRE_Scanner_search_impl(self);
}
/*[clinic end generated code: output=cbafe6a83364b0b0 input=a9049054013a1b77]*/
static PyMethodDef pattern_methods[] = {
_SRE_SRE_PATTERN_MATCH_METHODDEF
_SRE_SRE_PATTERN_FULLMATCH_METHODDEF
_SRE_SRE_PATTERN_SEARCH_METHODDEF
_SRE_SRE_PATTERN_SUB_METHODDEF
_SRE_SRE_PATTERN_SUBN_METHODDEF
_SRE_SRE_PATTERN_FINDALL_METHODDEF
_SRE_SRE_PATTERN_SPLIT_METHODDEF
_SRE_SRE_PATTERN_FINDITER_METHODDEF
_SRE_SRE_PATTERN_SCANNER_METHODDEF
_SRE_SRE_PATTERN___COPY___METHODDEF
_SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
{NULL, NULL}
};
static PyGetSetDef pattern_getset[] = {
{"groupindex", (getter)pattern_groupindex, (setter)NULL,
"A dictionary mapping group names to group numbers."},
{NULL} /* Sentinel */
};
#define PAT_OFF(x) offsetof(PatternObject, x)
static PyMemberDef pattern_members[] = {
{"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
"The pattern string from which the RE object was compiled."},
{"flags", T_INT, PAT_OFF(flags), READONLY,
"The regex matching flags."},
{"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
"The number of capturing groups in the pattern."},
{"__weaklistoffset__", T_PYSSIZET, PAT_OFF(weakreflist), READONLY},
{NULL} /* Sentinel */
};
static PyType_Slot Pattern_Type_slots[] = {
{Py_tp_dealloc, reinterpret_cast<void*>(pattern_dealloc)},
{Py_tp_repr, reinterpret_cast<void*>(pattern_repr)},
{Py_tp_hash, reinterpret_cast<void*>(pattern_hash)},
{Py_tp_doc, pattern_doc},
{Py_tp_richcompare, reinterpret_cast<void*>(pattern_richcompare)},
{Py_tp_methods, pattern_methods},
{Py_tp_members, pattern_members},
{Py_tp_getset, pattern_getset},
{0, 0},
};
static PyType_Spec Pattern_Type_spec = {
"re.Pattern",
sizeof(PatternObject),
sizeof(SRE_CODE),
Py_TPFLAGS_DEFAULT,
Pattern_Type_slots
};
/* Match objects do not support length or assignment, but do support
__getitem__. */
static PyMethodDef match_methods[] = {
{"group", (PyCFunction)((void*) match_group), METH_VARARGS, match_group_doc},
_SRE_SRE_MATCH_START_METHODDEF
_SRE_SRE_MATCH_END_METHODDEF
_SRE_SRE_MATCH_SPAN_METHODDEF
_SRE_SRE_MATCH_GROUPS_METHODDEF
_SRE_SRE_MATCH_GROUPDICT_METHODDEF
_SRE_SRE_MATCH_EXPAND_METHODDEF
_SRE_SRE_MATCH___COPY___METHODDEF
_SRE_SRE_MATCH___DEEPCOPY___METHODDEF
{NULL, NULL}
};
static PyGetSetDef match_getset[] = {
{"lastindex", (getter)match_lastindex_get, (setter)NULL,
"The integer index of the last matched capturing group."},
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
"The name of the last matched capturing group."},
{"regs", (getter)match_regs_get, (setter)NULL},
{NULL}
};
#define MATCH_OFF(x) offsetof(MatchObject, x)
static PyMemberDef match_members[] = {
{"string", T_OBJECT, MATCH_OFF(string), READONLY,
"The string passed to match() or search()."},
{"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
"The regular expression object."},
{"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
"The index into the string at which the RE engine started looking for a match."},
{"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
"The index into the string beyond which the RE engine will not go."},
{NULL}
};
/* FIXME: implement setattr("string", None) as a special case (to
detach the associated string, if any */
static PyType_Slot Match_Type_slots[] = {
{Py_tp_dealloc, reinterpret_cast<void*>(match_dealloc)},
{Py_tp_repr, reinterpret_cast<void*>(match_repr)},
{Py_tp_doc, match_doc},
{Py_tp_methods, match_methods},
{Py_tp_members, match_members},
{Py_tp_getset, match_getset},
{Py_mp_subscript, reinterpret_cast<void*>(match_getitem)},
{0, 0},
};
static PyType_Spec Match_Type_spec = {
"re.Match",
sizeof(MatchObject),
sizeof(Py_ssize_t),
Py_TPFLAGS_DEFAULT,
Match_Type_slots
};
static PyMethodDef scanner_methods[] = {
_SRE_SRE_SCANNER_MATCH_METHODDEF
_SRE_SRE_SCANNER_SEARCH_METHODDEF
{NULL, NULL}
};
#define SCAN_OFF(x) offsetof(ScannerObject, x)
static PyMemberDef scanner_members[] = {
{"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
{NULL} /* Sentinel */
};
static PyType_Slot Scanner_Type_slots[] = {
{Py_tp_dealloc, reinterpret_cast<void*>(scanner_dealloc)},
{Py_tp_methods, scanner_methods},
{Py_tp_members, scanner_members},
{0, 0},
};
static PyType_Spec Scanner_Type_spec = {
"_" SRE_MODULE ".SRE_Scanner",
sizeof(ScannerObject),
0,
Py_TPFLAGS_DEFAULT,
Scanner_Type_slots
};
static PyMethodDef _functions[] = {
_SRE_COMPILE_METHODDEF
_SRE_GETCODESIZE_METHODDEF
_SRE_ASCII_ISCASED_METHODDEF
_SRE_UNICODE_ISCASED_METHODDEF
_SRE_ASCII_TOLOWER_METHODDEF
_SRE_UNICODE_TOLOWER_METHODDEF
{NULL, NULL}
};
static int
_sre_clear(PyObject *module)
{
Py_CLEAR(_srestate(module)->Pattern_Type);
Py_CLEAR(_srestate(module)->Match_Type);
Py_CLEAR(_srestate(module)->Scanner_Type);
return 0;
}
static int
_sre_traverse(PyObject *module, visitproc visit, void *arg)
{
Py_VISIT(_srestate(module)->Pattern_Type);
Py_VISIT(_srestate(module)->Match_Type);
Py_VISIT(_srestate(module)->Scanner_Type);
return 0;
}
static void
_sre_free(void *module)
{
_sre_clear((PyObject *)module);
}
struct PyModuleDef sremodule = {
PyModuleDef_HEAD_INIT,
"_" SRE_MODULE,
NULL,
sizeof(_srestate),
_functions,
NULL,
_sre_traverse,
_sre_clear,
_sre_free,
};
PyMODINIT_FUNC PyInit__sre(void)
{
PyObject* m;
PyObject* x;
m = PyState_FindModule(&sremodule);
if (m != NULL) {
Py_INCREF(m);
return m;
}
m = PyModule_Create(&sremodule);
if (m == NULL)
return NULL;
/* Patch object types */
PyTypeObject *Pattern_Type = (PyTypeObject *)PyType_FromSpec(&Pattern_Type_spec);
if (Pattern_Type == NULL) {
return NULL;
}
_srestate(m)->Pattern_Type = (PyObject *)Pattern_Type;
PyTypeObject *Match_Type = (PyTypeObject *)PyType_FromSpec(&Match_Type_spec);
if (Match_Type == NULL) {
return NULL;
}
_srestate(m)->Match_Type = (PyObject *)Match_Type;
PyTypeObject *Scanner_Type = (PyTypeObject *)PyType_FromSpec(&Scanner_Type_spec);
if (Scanner_Type == NULL) {
return NULL;
}
_srestate(m)->Scanner_Type = (PyObject *)Scanner_Type;
x = PyLong_FromLong(SRE_MAGIC);
if (x) {
PyModule_AddObject(m, "MAGIC", x);
}
x = PyLong_FromLong(sizeof(SRE_CODE));
if (x) {
PyModule_AddObject(m, "CODESIZE", x);
}
x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
if (x) {
PyModule_AddObject(m, "MAXREPEAT", x);
}
x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
if (x) {
PyModule_AddObject(m, "MAXGROUPS", x);
}
x = PyUnicode_FromString(copyright);
if (x) {
PyModule_AddObject(m, "copyright", x);
}
PyState_AddModule(m, &sremodule);
return m;
}
/* vim:ts=4:sw=4:et
*-
*/
}