validate_selector

in lib/crawler/data/extraction/rule.rb [104:124]


        def validate_selector
          raise ArgumentError, "Extraction rule selector can't be blank" if @selector.blank?

          if @source == SOURCES_HTML
            sample = Nokogiri::HTML::DocumentFragment.parse('<a></a>')
            css_error = validate_css(sample)
            xpath_error = validate_xpath(sample)
            if css_error && xpath_error
              raise ArgumentError,
                    "Extraction rule selector '#{@selector}' is an invalid HTML selector: #{css_error} & #{xpath_error}"
            end
          else
            begin
              Regexp.new(@selector)
            rescue RegexpError => e
              raise ArgumentError,
                    "Extraction rule selector `#{@selector}` is not a valid regular expression: #{e.message}"
            end
          end
        end