# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'spec_helper'

describe TwitterCldr::Shared::UnicodeRegex do
  def compile(str, symbol_table = nil)
    described_class.compile(str, "", symbol_table)
  end

  let(:symbol_table) do
    tokenizer = TwitterCldr::Tokenizers::UnicodeRegexTokenizer.new
    table = TwitterCldr::Parsers::SymbolTable.new({
      "$FOO" => tokenizer.tokenize("[g-k]"),
      "$BAR" => tokenizer.tokenize("[p-s]")
    })
  end

  context "basic operations" do
    let(:regex) { compile("[abc]") }

    describe "#compile" do
      it "should return a UnicodeRegex, parsed and ready to go" do
        expect(regex).to be_a(described_class)
      end
    end

    describe "#to_regexp_str" do
      it "should return the string representation of this regex" do
        expect(regex.to_regexp_str).to eq("(?:[\\u{0061}-\\u{0063}])")
      end
    end

    describe "#to_regexp" do
      it "should return a ruby Regexp" do
        expect(regex.to_regexp).to be_a(Regexp)
      end

      it "should properly turn various basic regexes into strings" do
        expect(compile("^abc$").to_regexp_str).to eq("^(?:\\u{0061})(?:\\u{0062})(?:\\u{0063})$")
        expect(compile("a(b)c").to_regexp_str).to eq("(?:\\u{0061})((?:\\u{0062}))(?:\\u{0063})")
        expect(compile("a(?:b)c").to_regexp_str).to eq("(?:\\u{0061})(?:(?:\\u{0062}))(?:\\u{0063})")
        expect(compile("a{1,3}").to_regexp_str).to eq("(?:\\u{0061}){1,3}")
        expect(compile("[abc]").to_regexp_str).to eq("(?:[\\u{0061}-\\u{0063}])")
      end

      it "should properly turn various complex regexes into strings" do
        expect(compile("[a-z0-9]").to_regexp_str).to eq(
          "(?:[\\u{0030}-\\u{0039}]|[\\u{0061}-\\u{007a}])"
        )
        expect(compile("[\\u0067-\\u0071]").to_regexp_str).to eq("(?:[\\u{0067}-\\u{0071}])")
      end

      it "should properly substitute variables" do
        expect(compile("$FOO$BAR", symbol_table).to_regexp_str).to eq(
          "(?:[\\u{0067}-\\u{006b}])(?:[\\u{0070}-\\u{0073}])"
        )
      end

      it "supports modifiers" do
        regex = described_class.compile('abc', 'm').to_regexp
        expect(regex.options).to eq(Regexp::MULTILINE)
      end

      it "supports multiple modifiers at once" do
        regex = described_class.compile('abc', 'mi').to_regexp
        expect(regex.options).to eq(
          Regexp::MULTILINE | Regexp::IGNORECASE
        )
      end
    end
  end

  context "with a few variables" do
    describe "#match" do
      it "should substitute variables from the symbol table" do
        regex = compile("$FOO $BAR", symbol_table)
        expect(regex).to exactly_match("h r")
        expect(regex).to exactly_match("j q")
        expect(regex).not_to exactly_match("h t")
        expect(regex).not_to exactly_match("c s")
      end
    end
  end

  context "matching basics" do
    describe "#match" do
      it "should match a regex with no char class" do
        regex = compile("^abc$")
        expect(regex).to exactly_match("abc")
        expect(regex).not_to exactly_match("cba")
      end

      it "should match a regex with a capturing group" do
        regex = compile("a(b)c")
        match = regex.match("abc")
        expect(match).not_to be_nil
        expect(match.captures[0]).to eq("b")
      end

      it "should match a regex with a non-capturing group" do
        regex = compile("a(?:b)c")
        match = regex.match("abc")
        expect(match).not_to be_nil
        expect(match.captures).to eq([])
      end

      it "should match a regex with a quantifier" do
        regex = compile("a{1,3}")
        expect(regex).to exactly_match("a")
        expect(regex).to exactly_match("aa")
        expect(regex).to exactly_match("aaa")
        expect(regex).not_to exactly_match("aaaa")
        expect(regex).not_to exactly_match("b")
      end

      it "should match a regex with a basic char class" do
        regex = compile("[abc]")
        expect(regex).to exactly_match("a")
        expect(regex).to exactly_match("b")
        expect(regex).to exactly_match("c")
        expect(regex).not_to exactly_match("ab")
        expect(regex).not_to exactly_match("d")
      end
    end
  end

  context "matching complex character classes" do
    describe "#match" do
      it "should match a regex with a char class containing a range" do
        regex = compile("[a-z0-9]")
        expect(regex).to exactly_match("a")
        expect(regex).to exactly_match("m")
        expect(regex).to exactly_match("z")
        expect(regex).to exactly_match("0")
        expect(regex).to exactly_match("3")
        expect(regex).to exactly_match("9")
        expect(regex).not_to exactly_match("a0")
        expect(regex).not_to exactly_match("m4")
      end

      it "should match a regex with a char class containing a unicode range" do
        regex = compile("[\\u0067-\\u0071]")  # g-q
        expect(regex).to exactly_match("g")
        expect(regex).to exactly_match("q")
        expect(regex).to exactly_match("h")
        expect(regex).not_to exactly_match("z")
      end

      it "should match a regex containing a character set" do
        regex = compile("[\\p{Zs}]")
        expect(regex).to exactly_match([160].pack("U*"))  # non-breaking space
        expect(regex).to exactly_match([5760].pack("U*"))  # ogham space mark
        expect(regex).not_to exactly_match("a")
      end

      it "should match a regex containing a negated character set" do
        regex = compile("[\\P{Zs}]")
        expect(regex).to exactly_match("a")
        expect(regex).not_to exactly_match([160].pack("U*"))
        expect(regex).not_to exactly_match([5760].pack("U*"))
      end

      it "should match a regex containing a character set (alternate syntax)" do
        regex = compile("[[:Zs:]]")
        expect(regex).to exactly_match([160].pack("U*"))  # non-breaking space
        expect(regex).to exactly_match([5760].pack("U*"))  # ogham space mark
        expect(regex).not_to exactly_match("a")
      end

      it "should match a regex containing a unioned character set" do
        regex = compile("[[:L:][:White_Space:]]*")
        expect(regex).to exactly_match("abc")
        expect(regex).to exactly_match("くøß")
        expect("a b c _ d".gsub(regex.to_regexp, "")).to eq("_")
      end

      it "should match a regex containing a negated unioned character set" do
        regex = compile("[^[:L:][:White_Space:]]*")
        expect(regex).to exactly_match(".,/")
        expect(regex).to_not exactly_match("a b c")
        expect("a b c _ d".gsub(regex.to_regexp, "")).to eq("a b c  d")
      end

      it "should match a regex containing a negated character set (alternate syntax)" do
        regex = compile("[[:^Zs:]]")
        expect(regex).to exactly_match("a")
        expect(regex).not_to exactly_match([160].pack("U*"))
        expect(regex).not_to exactly_match([5760].pack("U*"))
      end

      it "should match a regex with a character set and some quantifiers" do
        regex = compile("[\\u0067-\\u0071]+")
        expect(regex).to exactly_match("gg")
        expect(regex).to exactly_match("gh")
        expect(regex).to exactly_match("qjk")
        expect(regex).not_to exactly_match("")
      end

      it "should match a regex that uses special switches inside the char class" do
        regex = compile("[\\w]+")
        expect(regex).to exactly_match("a")
        expect(regex).to exactly_match("abc")
        expect(regex).to exactly_match("a0b_1c2")
        expect(regex).not_to exactly_match("$@#")
      end

      it "should match a regex that uses negated special switches inside the char class" do
        regex = compile("[\\W]+")
        expect(regex).not_to exactly_match("a")
        expect(regex).not_to exactly_match("abc")
        expect(regex).not_to exactly_match("a0b_1c2")
        expect(regex).to exactly_match("$@#")
      end

      it "should match a regex with a complicated expression inside the char class" do
        # [separators U space-tilde] diff [letters diff numbers]  (diff is commutative)
        regex = compile("[[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]")
        expect(regex).to exactly_match(" ")
        expect(regex).to exactly_match(",")
        expect(regex).not_to exactly_match("a")
      end

      it "should treat a dash that is the first character of a character class as a literal dash instead of a range" do
        regex = compile("[-abc]*")
        expect(regex).to exactly_match("a-b-c")
        expect(regex).to exactly_match("--a")
        expect(regex).not_to exactly_match("def")
      end
    end
  end
end
