int main()

in pcre/pcrecpp_unittest.cc [810:1291]


int main(int argc, char** argv) {
  // Treat any flag as --help
  if (argc > 1 && argv[1][0] == '-') {
    printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
           "       If 'timingX ###' is specified, run the given timing test\n"
           "       with the given number of iterations, rather than running\n"
           "       the default corectness test.\n", argv[0]);
    return 0;
  }

  if (argc > 1) {
    if ( argc == 2 || atoi(argv[2]) == 0) {
      printf("timing mode needs a num-iters argument\n");
      return 1;
    }
    if (!strcmp(argv[1], "timing1"))
      Timing1(atoi(argv[2]));
    else if (!strcmp(argv[1], "timing2"))
      Timing2(atoi(argv[2]));
    else if (!strcmp(argv[1], "timing3"))
      Timing3(atoi(argv[2]));
    else
      printf("Unknown argument '%s'\n", argv[1]);
    return 0;
  }

  printf("PCRE C++ wrapper tests\n");
  printf("Testing FullMatch\n");

  int i;
  string s;

  /***** FullMatch with no args *****/

  CHECK(RE("h.*o").FullMatch("hello"));
  CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
  CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
  CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
  CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
  CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops

  /***** FullMatch with args *****/

  // Zero-arg
  CHECK(RE("\\d+").FullMatch("1001"));

  // Single-arg
  CHECK(RE("(\\d+)").FullMatch("1001",   &i));
  CHECK_EQ(i, 1001);
  CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
  CHECK_EQ(i, -123);
  CHECK(!RE("()\\d+").FullMatch("10", &i));
  CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
                                &i));

  // Digits surrounding integer-arg
  CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
  CHECK_EQ(i, 23);
  CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
  CHECK_EQ(i, 1);
  CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
  CHECK_EQ(i, -1);
  CHECK(RE("(\\d)").PartialMatch("1234", &i));
  CHECK_EQ(i, 1);
  CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
  CHECK_EQ(i, -1);

  // String-arg
  CHECK(RE("h(.*)o").FullMatch("hello", &s));
  CHECK_EQ(s, string("ell"));

  // StringPiece-arg
  StringPiece sp;
  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
  CHECK_EQ(sp.size(), 4);
  CHECK(memcmp(sp.data(), "ruby", 4) == 0);
  CHECK_EQ(i, 1234);

  // Multi-arg
  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
  CHECK_EQ(s, string("ruby"));
  CHECK_EQ(i, 1234);

  // Ignore non-void* NULL arg
  CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
  CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
  CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
  CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
#ifdef HAVE_LONG_LONG
  CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
#endif
  CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
  CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));

  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
  CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
  CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
  CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
  CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
  CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));

  // Ignored arg
  CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
  CHECK_EQ(s, string("ruby"));
  CHECK_EQ(i, 1234);

  // Type tests
  {
    char c;
    CHECK(RE("(H)ello").FullMatch("Hello", &c));
    CHECK_EQ(c, 'H');
  }
  {
    unsigned char c;
    CHECK(RE("(H)ello").FullMatch("Hello", &c));
    CHECK_EQ(c, static_cast<unsigned char>('H'));
  }
  {
    short v;
    CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
    CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
    CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
    CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
    CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
    CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
  }
  {
    unsigned short v;
    CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
    CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
    CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
    CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
  }
  {
    int v;
    static const int max_value = 0x7fffffff;
    static const int min_value = -max_value - 1;
    CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
    CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
    CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
    CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
    CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
    CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
  }
  {
    unsigned int v;
    static const unsigned int max_value = 0xfffffffful;
    CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
    CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
    CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
  }
#ifdef HAVE_LONG_LONG
# if defined(__MINGW__) || defined(__MINGW32__)
#   define LLD "%I64d"
#   define LLU "%I64u"
# else
#   define LLD "%lld"
#   define LLU "%llu"
# endif
  {
    long long v;
    static const long long max_value = 0x7fffffffffffffffLL;
    static const long long min_value = -max_value - 1;
    char buf[32];  // definitely big enough for a long long

    CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
    CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);

    sprintf(buf, LLD, max_value);
    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);

    sprintf(buf, LLD, min_value);
    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);

    sprintf(buf, LLD, max_value);
    assert(buf[strlen(buf)-1] != '9');
    buf[strlen(buf)-1]++;
    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));

    sprintf(buf, LLD, min_value);
    assert(buf[strlen(buf)-1] != '9');
    buf[strlen(buf)-1]++;
    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  }
#endif
#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
  {
    unsigned long long v;
    long long v2;
    static const unsigned long long max_value = 0xffffffffffffffffULL;
    char buf[32];  // definitely big enough for a unsigned long long

    CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
    CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);

    sprintf(buf, LLU, max_value);
    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);

    assert(buf[strlen(buf)-1] != '9');
    buf[strlen(buf)-1]++;
    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  }
#endif
  {
    float v;
    CHECK(RE("(.*)").FullMatch("100", &v));
    CHECK(RE("(.*)").FullMatch("-100.", &v));
    CHECK(RE("(.*)").FullMatch("1e23", &v));
  }
  {
    double v;
    CHECK(RE("(.*)").FullMatch("100", &v));
    CHECK(RE("(.*)").FullMatch("-100.", &v));
    CHECK(RE("(.*)").FullMatch("1e23", &v));
  }

  // Check that matching is fully anchored
  CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
  CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
  CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
  CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);

  // Braces
  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
  CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));

  // Complicated RE
  CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
  CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
  CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
  CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));

  // Check full-match handling (needs '$' tacked on internally)
  CHECK(RE("fo|foo").FullMatch("fo"));
  CHECK(RE("fo|foo").FullMatch("foo"));
  CHECK(RE("fo|foo$").FullMatch("fo"));
  CHECK(RE("fo|foo$").FullMatch("foo"));
  CHECK(RE("foo$").FullMatch("foo"));
  CHECK(!RE("foo\\$").FullMatch("foo$bar"));
  CHECK(!RE("fo|bar").FullMatch("fox"));

  // Uncomment the following if we change the handling of '$' to
  // prevent it from matching a trailing newline
  if (false) {
    // Check that we don't get bitten by pcre's special handling of a
    // '\n' at the end of the string matching '$'
    CHECK(!RE("foo$").PartialMatch("foo\n"));
  }

  // Number of args
  int a[16];
  CHECK(RE("").FullMatch(""));

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d){1}").FullMatch("1",
                                 &a[0]));
  CHECK_EQ(a[0], 1);

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d)(\\d)").FullMatch("12",
                                   &a[0],  &a[1]));
  CHECK_EQ(a[0], 1);
  CHECK_EQ(a[1], 2);

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
                                        &a[0],  &a[1],  &a[2]));
  CHECK_EQ(a[0], 1);
  CHECK_EQ(a[1], 2);
  CHECK_EQ(a[2], 3);

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
                                             &a[0],  &a[1],  &a[2],  &a[3]));
  CHECK_EQ(a[0], 1);
  CHECK_EQ(a[1], 2);
  CHECK_EQ(a[2], 3);
  CHECK_EQ(a[3], 4);

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
                                                  &a[0],  &a[1],  &a[2],
                                                  &a[3],  &a[4]));
  CHECK_EQ(a[0], 1);
  CHECK_EQ(a[1], 2);
  CHECK_EQ(a[2], 3);
  CHECK_EQ(a[3], 4);
  CHECK_EQ(a[4], 5);

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
                                                       &a[0],  &a[1],  &a[2],
                                                       &a[3],  &a[4],  &a[5]));
  CHECK_EQ(a[0], 1);
  CHECK_EQ(a[1], 2);
  CHECK_EQ(a[2], 3);
  CHECK_EQ(a[3], 4);
  CHECK_EQ(a[4], 5);
  CHECK_EQ(a[5], 6);

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
                                                            &a[0],  &a[1],  &a[2],  &a[3],
                                                            &a[4],  &a[5],  &a[6]));
  CHECK_EQ(a[0], 1);
  CHECK_EQ(a[1], 2);
  CHECK_EQ(a[2], 3);
  CHECK_EQ(a[3], 4);
  CHECK_EQ(a[4], 5);
  CHECK_EQ(a[5], 6);
  CHECK_EQ(a[6], 7);

  memset(a, 0, sizeof(0));
  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
           "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
               "1234567890123456",
               &a[0],  &a[1],  &a[2],  &a[3],
               &a[4],  &a[5],  &a[6],  &a[7],
               &a[8],  &a[9],  &a[10], &a[11],
               &a[12], &a[13], &a[14], &a[15]));
  CHECK_EQ(a[0], 1);
  CHECK_EQ(a[1], 2);
  CHECK_EQ(a[2], 3);
  CHECK_EQ(a[3], 4);
  CHECK_EQ(a[4], 5);
  CHECK_EQ(a[5], 6);
  CHECK_EQ(a[6], 7);
  CHECK_EQ(a[7], 8);
  CHECK_EQ(a[8], 9);
  CHECK_EQ(a[9], 0);
  CHECK_EQ(a[10], 1);
  CHECK_EQ(a[11], 2);
  CHECK_EQ(a[12], 3);
  CHECK_EQ(a[13], 4);
  CHECK_EQ(a[14], 5);
  CHECK_EQ(a[15], 6);

  /***** PartialMatch *****/

  printf("Testing PartialMatch\n");

  CHECK(RE("h.*o").PartialMatch("hello"));
  CHECK(RE("h.*o").PartialMatch("othello"));
  CHECK(RE("h.*o").PartialMatch("hello!"));
  CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));

  /***** other tests *****/

  RadixTests();
  TestReplace();
  TestExtract();
  TestConsume();
  TestFindAndConsume();
  TestQuoteMetaAll();
  TestMatchNumberPeculiarity();

  // Check the pattern() accessor
  {
    const string kPattern = "http://([^/]+)/.*";
    const RE re(kPattern);
    CHECK_EQ(kPattern, re.pattern());
  }

  // Check RE error field.
  {
    RE re("foo");
    CHECK(re.error().empty());  // Must have no error
  }

#ifdef SUPPORT_UTF8
  // Check UTF-8 handling
  {
    printf("Testing UTF-8 handling\n");

    // Three Japanese characters (nihongo)
    const unsigned char utf8_string[] = {
         0xe6, 0x97, 0xa5, // 65e5
         0xe6, 0x9c, 0xac, // 627c
         0xe8, 0xaa, 0x9e, // 8a9e
         0
    };
    const unsigned char utf8_pattern[] = {
         '.',
         0xe6, 0x9c, 0xac, // 627c
         '.',
         0
    };

    // Both should match in either mode, bytes or UTF-8
    RE re_test1(".........");
    CHECK(re_test1.FullMatch(utf8_string));
    RE re_test2("...", pcrecpp::UTF8());
    CHECK(re_test2.FullMatch(utf8_string));

    // Check that '.' matches one byte or UTF-8 character
    // according to the mode.
    string ss;
    RE re_test3("(.)");
    CHECK(re_test3.PartialMatch(utf8_string, &ss));
    CHECK_EQ(ss, string("\xe6"));
    RE re_test4("(.)", pcrecpp::UTF8());
    CHECK(re_test4.PartialMatch(utf8_string, &ss));
    CHECK_EQ(ss, string("\xe6\x97\xa5"));

    // Check that string matches itself in either mode
    RE re_test5(utf8_string);
    CHECK(re_test5.FullMatch(utf8_string));
    RE re_test6(utf8_string, pcrecpp::UTF8());
    CHECK(re_test6.FullMatch(utf8_string));

    // Check that pattern matches string only in UTF8 mode
    RE re_test7(utf8_pattern);
    CHECK(!re_test7.FullMatch(utf8_string));
    RE re_test8(utf8_pattern, pcrecpp::UTF8());
    CHECK(re_test8.FullMatch(utf8_string));
  }

  // Check that ungreedy, UTF8 regular expressions don't match when they
  // oughtn't -- see bug 82246.
  {
    // This code always worked.
    const char* pattern = "\\w+X";
    const string target = "a aX";
    RE match_sentence(pattern);
    RE match_sentence_re(pattern, pcrecpp::UTF8());

    CHECK(!match_sentence.FullMatch(target));
    CHECK(!match_sentence_re.FullMatch(target));
  }

  {
    const char* pattern = "(?U)\\w+X";
    const string target = "a aX";
    RE match_sentence(pattern);
    RE match_sentence_re(pattern, pcrecpp::UTF8());

    CHECK(!match_sentence.FullMatch(target));
    CHECK(!match_sentence_re.FullMatch(target));
  }
#endif  /* def SUPPORT_UTF8 */

  printf("Testing error reporting\n");

  { RE re("a\\1"); CHECK(!re.error().empty()); }
  {
    RE re("a[x");
    CHECK(!re.error().empty());
  }
  {
    RE re("a[z-a]");
    CHECK(!re.error().empty());
  }
  {
    RE re("a[[:foobar:]]");
    CHECK(!re.error().empty());
  }
  {
    RE re("a(b");
    CHECK(!re.error().empty());
  }
  {
    RE re("a\\");
    CHECK(!re.error().empty());
  }

  // Test that recursion is stopped
  TestRecursion();

  // Test Options
  if (getenv("VERBOSE_TEST") != NULL)
    VERBOSE_TEST  = true;
  TestOptions();

  // Test the constructors
  TestConstructors();

  // Done
  printf("OK\n");

  return 0;
}