static void TestReplace()

in pcre/pcrecpp_unittest.cc [208:364]


static void TestReplace() {
  printf("Testing Replace\n");

  struct ReplaceTest {
    const char *regexp;
    const char *rewrite;
    const char *original;
    const char *single;
    const char *global;
    int global_count;         // the expected return value from ReplaceAll
  };
  static const ReplaceTest tests[] = {
    { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
      "\\2\\1ay",
      "the quick brown fox jumps over the lazy dogs.",
      "ethay quick brown fox jumps over the lazy dogs.",
      "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
      9 },
    { "\\w+",
      "\\0-NOSPAM",
      "paul.haahr@google.com",
      "paul-NOSPAM.haahr@google.com",
      "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
      4 },
    { "^",
      "(START)",
      "foo",
      "(START)foo",
      "(START)foo",
      1 },
    { "^",
      "(START)",
      "",
      "(START)",
      "(START)",
      1 },
    { "$",
      "(END)",
      "",
      "(END)",
      "(END)",
      1 },
    { "b",
      "bb",
      "ababababab",
      "abbabababab",
      "abbabbabbabbabb",
       5 },
    { "b",
      "bb",
      "bbbbbb",
      "bbbbbbb",
      "bbbbbbbbbbbb",
      6 },
    { "b+",
      "bb",
      "bbbbbb",
      "bb",
      "bb",
      1 },
    { "b*",
      "bb",
      "bbbbbb",
      "bb",
      "bbbb",
      2 },
    { "b*",
      "bb",
      "aaaaa",
      "bbaaaaa",
      "bbabbabbabbabbabb",
      6 },
    { "b*",
      "bb",
      "aa\naa\n",
      "bbaa\naa\n",
      "bbabbabb\nbbabbabb\nbb",
      7 },
    { "b*",
      "bb",
      "aa\raa\r",
      "bbaa\raa\r",
      "bbabbabb\rbbabbabb\rbb",
      7 },
    { "b*",
      "bb",
      "aa\r\naa\r\n",
      "bbaa\r\naa\r\n",
      "bbabbabb\r\nbbabbabb\r\nbb",
      7 },
    // Check empty-string matching (it's tricky!)
    { "aa|b*",
      "@",
      "aa",
      "@",
      "@@",
      2 },
    { "b*|aa",
      "@",
      "aa",
      "@aa",
      "@@@",
      3 },
#ifdef SUPPORT_UTF8
    { "b*",
      "bb",
      "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
      "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
      "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
      5 },
    { "b*",
      "bb",
      "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
      "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
      ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
       "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
      9 },
#endif
    { "", NULL, NULL, NULL, NULL, 0 }
  };

#ifdef SUPPORT_UTF8
  const bool support_utf8 = true;
#else
  const bool support_utf8 = false;
#endif

  for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
    RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
    assert(re.error().empty());
    string one(t->original);
    CHECK(re.Replace(t->rewrite, &one));
    CHECK_EQ(one, t->single);
    string all(t->original);
    const int replace_count = re.GlobalReplace(t->rewrite, &all);
    CHECK_EQ(all, t->global);
    CHECK_EQ(replace_count, t->global_count);
  }

  // One final test: test \r\n replacement when we're not in CRLF mode
  {
    RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
    assert(re.error().empty());
    string all("aa\r\naa\r\n");
    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  }
  {
    RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
    assert(re.error().empty());
    string all("aa\r\naa\r\n");
    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  }
  // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
  //       Alas, the answer depends on how pcre was compiled.
}