path # lines of code # active days days since first update days since last update # commits # contributors first updated last updated first contributor last contributor spec/integration/url_fragments_spec.rb 22 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/robots_txt_spec.rb 169 6 407 47 6 2 2024-04-02 2025-03-28 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/integration/sitemap_spec.rb 36 4 407 373 4 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/content_extraction_spec.rb 44 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/response_content_type_spec.rb 28 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/legacy_sitemaps_spec.rb 69 4 407 282 4 1 2024-04-02 2024-08-05 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/timeouts/request_timeout_spec.rb 65 5 407 320 5 2 2024-04-02 2024-06-28 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/timeouts/socket_timeout_spec.rb 23 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/sitemap_xxe_spec.rb 69 5 407 282 5 1 2024-04-02 2024-08-05 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/charset_spec.rb 44 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/nofollow_spec.rb 30 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/response_limits_spec.rb 34 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/seed_spec.rb 15 4 407 373 4 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/redirects_spec.rb 40 5 407 282 5 1 2024-04-02 2024-08-05 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/integration/headers_spec.rb 75 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/factories/crawl_results.rb 104 6 400 266 6 1 2024-04-09 2024-08-21 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/support/mock_response.rb 11 3 407 366 3 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/support/cli_helpers.rb 32 1 364 364 1 1 2024-05-15 2024-05-15 vidokx@gmail.com vidokx@gmail.com spec/support/faux/results_collection.rb 17 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/support/faux/faux_crawl.rb 153 6 407 258 6 1 2024-04-02 2024-08-29 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/support/crawl_response_matchers.rb 46 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/support/fixtures.rb 17 2 407 373 2 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler_spec.rb 16 2 407 373 2 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/http_executor_spec.rb 366 7 407 47 7 2 2024-04-02 2025-03-28 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/url_validator_spec.rb 132 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/coordinator_spec.rb 708 12 407 97 13 3 2024-04-02 2025-02-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/document_mapper_spec.rb 219 3 266 28 3 2 2024-08-21 2025-04-16 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/stats_spec.rb 85 2 407 373 2 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/logging/crawllogger_spec.rb 68 1 47 47 1 1 2025-03-28 2025-03-28 matt.nowzari@elastic.co matt.nowzari@elastic.co spec/lib/crawler/http_client_spec.rb 318 5 407 47 5 2 2024-04-02 2025-03-28 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/content_engine/transformer_spec.rb 213 1 302 302 1 1 2024-07-16 2024-07-16 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/content_engine/extractor_spec.rb 188 2 302 295 2 1 2024-07-16 2024-07-23 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/content_engine/utils_spec.rb 35 1 302 302 1 1 2024-07-16 2024-07-16 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/rule_spec.rb 22 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/link_spec.rb 119 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/url_queue_spec.rb 14 5 407 320 5 1 2024-04-02 2024-06-28 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/crawl_result/sitemap_spec.rb 194 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/crawl_result/html_spec.rb 365 5 407 28 5 2 2024-04-02 2025-04-16 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/data/crawl_task_spec.rb 10 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/extraction/ruleset_spec.rb 61 2 306 302 2 1 2024-07-12 2024-07-16 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/extraction/rule_spec.rb 162 3 306 37 3 2 2024-07-12 2025-04-07 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/data/extraction/url_filter_spec.rb 58 1 306 306 1 1 2024-07-12 2024-07-12 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/url_queue/memory_only_spec.rb 54 5 407 320 5 1 2024-04-02 2024-06-28 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/crawl_result_spec.rb 72 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/url_spec.rb 55 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/data/domain_spec.rb 31 2 407 373 2 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/rule_engine/base_spec.rb 182 6 407 294 6 1 2024-04-02 2024-07-24 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/cli/urltest_spec.rb 49 1 33 33 1 1 2025-04-11 2025-04-11 matt.nowzari@elastic.co matt.nowzari@elastic.co spec/lib/crawler/cli/validate_spec.rb 53 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com spec/lib/crawler/cli/crawl_spec.rb 63 2 364 56 2 2 2024-05-15 2025-03-19 vidokx@gmail.com matt.nowzari@elastic.co spec/lib/crawler/cli/helpers_spec.rb 196 1 23 23 1 1 2025-04-21 2025-04-21 williamseaston@gmail.com williamseaston@gmail.com spec/lib/crawler/cli/version_spec.rb 9 1 366 366 1 1 2024-05-13 2024-05-13 vidokx@gmail.com vidokx@gmail.com spec/lib/crawler/cli/schedule_spec.rb 50 1 259 259 1 1 2024-08-28 2024-08-28 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/output_sink_spec.rb 36 9 407 97 9 3 2024-04-02 2025-02-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/robots_txt_parser_spec.rb 133 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/url_validator/dns_check_spec.rb 62 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/tcp_check_spec.rb 86 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/domain_access_check_spec.rb 33 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/crawl_rules_check_spec.rb 49 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/url_content_check_spec.rb 148 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/robots_txt_check_spec.rb 104 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/url_check_spec.rb 74 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/url_request_check_spec.rb 165 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/url_validator/domain_uniqueness_check_spec.rb 33 1 132 132 1 1 2025-01-02 2025-01-02 837854+bsantanna@users.noreply.github.com 837854+bsantanna@users.noreply.github.com spec/lib/crawler/http_utils/response_spec.rb 42 3 394 366 3 1 2024-04-15 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/http_utils/filtering_dns_resolver_spec.rb 55 3 394 366 3 1 2024-04-15 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/http_utils/bad_ssl_spec.rb 109 4 394 366 4 1 2024-04-15 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/http_utils/config_spec.rb 18 2 394 373 2 1 2024-04-15 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/crawler/event_generator_spec.rb 150 7 407 47 7 2 2024-04-02 2025-03-28 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/api/crawl_spec.rb 183 12 407 7 12 3 2024-04-02 2025-05-07 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/api/config_spec.rb 218 9 407 6 9 4 2024-04-02 2025-05-08 13634519+navarone-feekery@users.noreply.github.com williamseaston@gmail.com spec/lib/crawler/output_sink/file_spec.rb 26 6 407 33 6 2 2024-04-02 2025-04-11 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co spec/lib/crawler/output_sink/elasticsearch_spec.rb 594 19 400 69 21 3 2024-04-09 2025-03-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/es/client_spec.rb 480 6 289 9 6 3 2024-07-29 2025-05-05 13634519+navarone-feekery@users.noreply.github.com jedrazb@gmail.com spec/lib/es/bulk_queue_spec.rb 124 1 289 289 1 1 2024-07-29 2024-07-29 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/lib/environment_spec.rb 5 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/spec_helper.rb 38 6 407 366 8 2 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com vidokx@gmail.com spec/fixtures/sitemap/sitemap_index.xml 11 1 407 407 1 1 2024-04-02 2024-04-02 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/fixtures/sitemap/sitemap_no_urls.xml 3 1 407 407 1 1 2024-04-02 2024-04-02 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com spec/fixtures/sitemap/sitemap_urlset.xml 27 1 407 407 1 1 2024-04-02 2024-04-02 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com script/support/string_colors.rb 14 2 401 394 3 1 2024-04-08 2024-04-15 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com script/licenses/lib/third_party.rb 79 1 358 358 1 1 2024-05-21 2024-05-21 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com script/licenses/generate_notice.rb 43 1 358 358 1 1 2024-05-21 2024-05-21 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com catalog-info.yaml 59 8 401 229 8 3 2024-04-08 2024-09-27 13634519+navarone-feekery@users.noreply.github.com klim.markelov@gmail.com lib/constants.rb 29 3 306 28 3 2 2024-07-12 2025-04-16 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/environment.rb 9 4 401 373 5 1 2024-04-08 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler.rb 12 3 407 366 3 2 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com vidokx@gmail.com lib/crawler/stats.rb 67 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/http_header_service.rb 73 5 407 356 5 2 2024-04-02 2024-05-23 13634519+navarone-feekery@users.noreply.github.com vidokx@gmail.com lib/crawler/coordinator.rb 563 14 407 5 15 2 2024-04-02 2025-05-09 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/http_client.rb 276 9 407 41 9 2 2024-04-02 2025-04-03 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/utils.rb 22 1 294 294 1 1 2024-07-24 2024-07-24 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/robots_txt_parser.rb 54 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/logging/logger.rb 44 1 47 47 1 1 2025-03-28 2025-03-28 matt.nowzari@elastic.co matt.nowzari@elastic.co lib/crawler/logging/handler/file.rb 50 1 47 47 1 1 2025-03-28 2025-03-28 matt.nowzari@elastic.co matt.nowzari@elastic.co lib/crawler/logging/handler/base.rb 19 1 47 47 1 1 2025-03-28 2025-03-28 matt.nowzari@elastic.co matt.nowzari@elastic.co lib/crawler/logging/handler/stdout.rb 49 1 47 47 1 1 2025-03-28 2025-03-28 matt.nowzari@elastic.co matt.nowzari@elastic.co lib/crawler/executor.rb 10 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/content_engine/extractor.rb 49 2 302 295 2 1 2024-07-16 2024-07-23 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/content_engine/transformer.rb 38 1 302 302 1 1 2024-07-16 2024-07-16 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/content_engine/utils.rb 67 1 302 302 1 1 2024-07-16 2024-07-16 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/seen_urls.rb 28 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/rule.rb 30 3 407 294 3 1 2024-04-02 2024-07-24 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/url.rb 46 4 407 295 4 1 2024-04-02 2024-07-23 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/link.rb 60 4 407 356 4 2 2024-04-02 2024-05-23 13634519+navarone-feekery@users.noreply.github.com vidokx@gmail.com lib/crawler/data/url_queue.rb 19 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/sitemap.rb 50 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/html.rb 153 8 407 28 8 3 2024-04-02 2025-04-16 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/data/crawl_result/unsupported_content_type.rb 21 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/content_extractable_file.rb 26 4 407 266 4 1 2024-04-02 2024-08-21 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/robots_txt.rb 10 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/base.rb 69 4 394 282 4 1 2024-04-15 2024-08-05 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/http_auth_disallowed_error.rb 20 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/success.rb 19 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/error.rb 23 5 407 280 5 1 2024-04-02 2024-08-07 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/redirect.rb 37 4 407 366 4 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_result/redirect_error.rb 16 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/rule_engine_outcome.rb 90 6 407 282 7 1 2024-04-02 2024-08-05 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/extraction/rule.rb 98 4 306 37 4 2 2024-07-12 2025-04-07 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/data/extraction/ruleset.rb 52 3 306 294 3 1 2024-07-12 2024-07-24 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/extraction/url_filter.rb 37 2 306 302 2 1 2024-07-12 2024-07-16 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/url_queue/base.rb 55 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/url_queue/memory_only.rb 52 4 407 358 4 1 2024-04-02 2024-05-21 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/domain.rb 33 2 407 373 2 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/data/crawl_task.rb 61 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/rule_engine/base.rb 68 7 407 282 7 2 2024-04-02 2024-08-05 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/robots_txt_service.rb 72 5 407 356 5 2 2024-04-02 2024-05-23 13634519+navarone-feekery@users.noreply.github.com vidokx@gmail.com lib/crawler/document_mapper.rb 88 10 404 28 11 2 2024-04-05 2025-04-16 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/cli/urltest.rb 17 1 33 33 1 1 2025-04-11 2025-04-11 matt.nowzari@elastic.co matt.nowzari@elastic.co lib/crawler/cli/crawl.rb 16 2 364 356 2 1 2024-05-15 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/cli/validate.rb 31 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/cli/version.rb 12 1 366 366 1 1 2024-05-13 2024-05-13 vidokx@gmail.com vidokx@gmail.com lib/crawler/cli/helpers.rb 43 3 356 23 3 3 2024-05-23 2025-04-21 vidokx@gmail.com williamseaston@gmail.com lib/crawler/cli/schedule.rb 34 1 259 259 1 1 2024-08-28 2024-08-28 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/mock_executor.rb 23 5 407 366 5 1 2024-04-02 2024-05-13 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/http_executor.rb 286 9 407 47 10 2 2024-04-02 2025-03-28 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/output_sink.rb 14 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/mock_event_logger.rb 13 2 407 373 2 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/core_ext.rb 5 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/url_validator/domain_uniqueness_check_concern.rb 12 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/crawl_rules_check_concern.rb 17 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/url_check_concern.rb 18 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/domain_access_check_concern.rb 12 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/result.rb 19 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/url_content_check_concern.rb 57 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/url_request_check_concern.rb 122 2 356 350 2 2 2024-05-23 2024-05-29 vidokx@gmail.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/url_validator/dns_check_concern.rb 30 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/tcp_check_concern.rb 24 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator/robots_txt_check_concern.rb 50 1 356 356 1 1 2024-05-23 2024-05-23 vidokx@gmail.com vidokx@gmail.com lib/crawler/url_validator.rb 124 2 356 350 2 2 2024-05-23 2024-05-29 vidokx@gmail.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/http_utils/config.rb 104 3 394 265 3 1 2024-04-15 2024-08-22 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/http_utils/response.rb 136 7 394 71 7 3 2024-04-15 2025-03-04 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/http_utils/all_trusting_trust_manager.rb 17 2 394 373 2 1 2024-04-15 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/http_utils/filtering_dns_resolver.rb 51 3 394 356 3 2 2024-04-15 2024-05-23 13634519+navarone-feekery@users.noreply.github.com vidokx@gmail.com lib/crawler/http_utils/exceptions.rb 150 2 394 373 2 1 2024-04-15 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/cli.rb 11 5 366 33 6 3 2024-05-13 2025-04-11 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/api/config.rb 341 27 407 6 27 5 2024-04-02 2025-05-08 13634519+navarone-feekery@users.noreply.github.com williamseaston@gmail.com lib/crawler/api/crawl.rb 185 12 407 5 12 3 2024-04-02 2025-05-09 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/output_sink/mock.rb 17 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/output_sink/elasticsearch.rb 222 21 401 7 24 4 2024-04-08 2025-05-07 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/crawler/output_sink/console.rb 17 4 407 373 4 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/output_sink/file.rb 20 5 407 250 5 1 2024-04-02 2024-09-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/output_sink/base.rb 41 12 407 265 13 1 2024-04-02 2024-08-22 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/output_sink/null.rb 9 3 407 373 3 1 2024-04-02 2024-05-06 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/crawler/event_generator.rb 279 11 407 47 11 3 2024-04-02 2025-03-28 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co lib/es/bulk_queue.rb 70 1 289 289 1 1 2024-07-29 2024-07-29 13634519+navarone-feekery@users.noreply.github.com 13634519+navarone-feekery@users.noreply.github.com lib/es/client.rb 198 6 289 9 6 3 2024-07-29 2025-05-05 13634519+navarone-feekery@users.noreply.github.com jedrazb@gmail.com lib/errors.rb 6 3 331 98 3 2 2024-06-17 2025-02-05 13634519+navarone-feekery@users.noreply.github.com matt.nowzari@elastic.co