in pcre/pcrecpp_unittest.cc [208:364]
static void TestReplace() {
printf("Testing Replace\n");
struct ReplaceTest {
const char *regexp;
const char *rewrite;
const char *original;
const char *single;
const char *global;
int global_count; // the expected return value from ReplaceAll
};
static const ReplaceTest tests[] = {
{ "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
"\\2\\1ay",
"the quick brown fox jumps over the lazy dogs.",
"ethay quick brown fox jumps over the lazy dogs.",
"ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
9 },
{ "\\w+",
"\\0-NOSPAM",
"paul.haahr@google.com",
"paul-NOSPAM.haahr@google.com",
"paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
4 },
{ "^",
"(START)",
"foo",
"(START)foo",
"(START)foo",
1 },
{ "^",
"(START)",
"",
"(START)",
"(START)",
1 },
{ "$",
"(END)",
"",
"(END)",
"(END)",
1 },
{ "b",
"bb",
"ababababab",
"abbabababab",
"abbabbabbabbabb",
5 },
{ "b",
"bb",
"bbbbbb",
"bbbbbbb",
"bbbbbbbbbbbb",
6 },
{ "b+",
"bb",
"bbbbbb",
"bb",
"bb",
1 },
{ "b*",
"bb",
"bbbbbb",
"bb",
"bbbb",
2 },
{ "b*",
"bb",
"aaaaa",
"bbaaaaa",
"bbabbabbabbabbabb",
6 },
{ "b*",
"bb",
"aa\naa\n",
"bbaa\naa\n",
"bbabbabb\nbbabbabb\nbb",
7 },
{ "b*",
"bb",
"aa\raa\r",
"bbaa\raa\r",
"bbabbabb\rbbabbabb\rbb",
7 },
{ "b*",
"bb",
"aa\r\naa\r\n",
"bbaa\r\naa\r\n",
"bbabbabb\r\nbbabbabb\r\nbb",
7 },
// Check empty-string matching (it's tricky!)
{ "aa|b*",
"@",
"aa",
"@",
"@@",
2 },
{ "b*|aa",
"@",
"aa",
"@aa",
"@@@",
3 },
#ifdef SUPPORT_UTF8
{ "b*",
"bb",
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
5 },
{ "b*",
"bb",
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
"bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
9 },
#endif
{ "", NULL, NULL, NULL, NULL, 0 }
};
#ifdef SUPPORT_UTF8
const bool support_utf8 = true;
#else
const bool support_utf8 = false;
#endif
for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
assert(re.error().empty());
string one(t->original);
CHECK(re.Replace(t->rewrite, &one));
CHECK_EQ(one, t->single);
string all(t->original);
const int replace_count = re.GlobalReplace(t->rewrite, &all);
CHECK_EQ(all, t->global);
CHECK_EQ(replace_count, t->global_count);
}
// One final test: test \r\n replacement when we're not in CRLF mode
{
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
assert(re.error().empty());
string all("aa\r\naa\r\n");
CHECK_EQ(re.GlobalReplace("bb", &all), 9);
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
}
{
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
assert(re.error().empty());
string all("aa\r\naa\r\n");
CHECK_EQ(re.GlobalReplace("bb", &all), 9);
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
}
// TODO: test what happens when no PCRE_NEWLINE_* flag is set.
// Alas, the answer depends on how pcre was compiled.
}