path # lines of code # active days days since first update days since last update # commits # contributors first updated last updated first contributor last contributor archetype/src/main/resources/archetype-resources/crawler-conf.yaml 62 52 3436 352 60 7 2015-12-11 2024-05-21 julien@digitalpebble.com rzo1@apache.org archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml 3 2 3436 3430 2 1 2015-12-11 2015-12-17 julien@digitalpebble.com julien@digitalpebble.com archetype/src/main/resources/archetype-resources/crawler.flux 115 13 3248 406 13 2 2016-06-16 2024-03-28 julien@digitalpebble.com julien@digitalpebble.com archetype/src/main/resources/META-INF/maven/archetype-metadata.xml 37 11 3436 176 11 3 2015-12-11 2024-11-13 julien@digitalpebble.com mvolikas@gmail.com core/src/main/java/org/apache/stormcrawler/indexing/StdOutIndexer.java 54 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/indexing/AbstractIndexerBolt.java 208 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/indexing/DummyIndexer.java 27 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java 134 3 406 240 4 3 2024-03-28 2024-09-10 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/parse/filter/MD5SignatureParseFilter.java 57 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/filter/XPathFilter.java 175 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/filter/DebugParseFilter.java 39 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/filter/LDJsonParseFilter.java 87 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/filter/CollectionTagger.java 125 2 406 210 4 3 2024-03-28 2024-10-10 13417392+rzo1@users.noreply.github.com psxjoy@outlook.com core/src/main/java/org/apache/stormcrawler/parse/filter/DomainParseFilter.java 36 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/filter/MimeTypeNormalization.java 36 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/parse/filter/LinkParseFilter.java 76 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/filter/CommaSeparatedToMultivaluedMetadata.java 42 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/JSoupFilter.java 10 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/JSoupFilters.java 112 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/parse/DocumentFragmentBuilder.java 69 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java 13 3 406 210 4 3 2024-03-28 2024-10-10 13417392+rzo1@users.noreply.github.com psxjoy@outlook.com core/src/main/java/org/apache/stormcrawler/parse/TextExtractor.java 155 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/parse/ParseData.java 45 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/ParseResult.java 79 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/parse/Outlink.java 35 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/ConfigurableTopology.java 66 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/jsoup/XPathFilter.java 87 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/jsoup/LDJsonParseFilter.java 79 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/jsoup/LinkParseFilter.java 74 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/Constants.java 19 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/DefaultScheduler.java 138 4 406 157 5 3 2024-03-28 2024-12-02 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/persistence/Scheduler.java 23 2 406 352 3 2 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/persistence/MemoryStatusUpdater.java 17 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/EmptyQueueListener.java 5 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/urlbuffer/URLBuffer.java 42 2 406 352 3 2 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/persistence/urlbuffer/PriorityURLBuffer.java 73 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/urlbuffer/SimpleURLBuffer.java 43 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/urlbuffer/SchedulingURLBuffer.java 112 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/urlbuffer/AbstractURLBuffer.java 64 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/AbstractStatusUpdaterBolt.java 166 2 406 167 3 2 2024-03-28 2024-11-22 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/persistence/AdaptiveScheduler.java 124 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/persistence/Status.java 14 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/StdOutStatusUpdater.java 19 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/persistence/AbstractQueryingSpout.java 166 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/Metadata.java 172 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/InitialisationUtil.java 125 2 406 352 3 2 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/util/ConfUtils.java 138 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/RobotsTags.java 101 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/URLStreamGrouping.java 54 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/CookieConverter.java 88 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/util/URLPartitioner.java 71 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/StringTabScheme.java 36 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/AbstractConfigurable.java 17 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/PerSecondReducer.java 35 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/util/MetadataTransfer.java 84 2 406 352 3 2 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/util/Configurable.java 41 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java 157 3 406 167 3 2 2024-03-28 2024-11-22 julien@digitalpebble.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/util/URLUtil.java 100 3 406 210 4 3 2024-03-28 2024-10-10 13417392+rzo1@users.noreply.github.com psxjoy@outlook.com core/src/main/java/org/apache/stormcrawler/util/ConfigurableHelper.java 58 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/RefreshTag.java 31 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/util/CollectionMetric.java 20 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/proxy/MultiProxyManager.java 137 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/proxy/SCProxy.java 104 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/proxy/SingleProxyManager.java 35 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/proxy/ProxyManager.java 7 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java 368 6 406 167 7 3 2024-03-28 2024-11-22 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java 287 4 406 240 5 4 2024-03-28 2024-09-10 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/bolt/URLFilterBolt.java 78 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/bolt/StatusEmitterBolt.java 73 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/bolt/URLPartitionerBolt.java 115 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java 714 3 406 167 4 2 2024-03-28 2024-11-22 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java 184 3 406 240 4 3 2024-03-28 2024-09-10 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java 397 3 406 167 4 2 2024-03-28 2024-11-22 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/protocol/ProtocolFactory.java 71 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/DelegatorProtocol.java 192 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/HttpHeaders.java 33 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/file/FileProtocol.java 32 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/file/FileResponse.java 107 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/protocol/okhttp/HttpProtocol.java 475 3 406 352 4 3 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/protocol/okhttp/DNSResolutionListener.java 26 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/HttpRobotRulesParser.java 151 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/httpclient/HttpProtocol.java 283 3 406 352 4 3 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/protocol/selenium/NavigationFilter.java 11 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/selenium/RemoteDriverProtocol.java 90 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/selenium/NavigationFilters.java 64 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/selenium/SeleniumProtocol.java 68 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/protocol/RobotRules.java 60 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/AbstractHttpProtocol.java 127 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/Protocol.java 105 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/protocol/RobotRulesParser.java 111 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/protocol/ProtocolResponse.java 37 2 406 167 3 2 2024-03-28 2024-11-22 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/spout/FileSpout.java 169 3 406 352 5 3 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/spout/MemorySpout.java 122 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/regex/RegexURLFilterBase.java 107 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/regex/RegexRule.java 11 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/regex/RegexURLFilter.java 22 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java 245 4 406 157 5 3 2024-03-28 2024-12-02 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/filtering/regex/RegexURLNormalizer.java 186 2 406 370 3 3 2024-03-28 2024-05-03 13417392+rzo1@users.noreply.github.com tallison314159@gmail.com core/src/main/java/org/apache/stormcrawler/filtering/URLFilter.java 13 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/URLFilters.java 144 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/metadata/MetadataFilter.java 49 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/basic/SelfURLFilter.java 23 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLFilter.java 58 2 406 352 3 2 2024-03-28 2024-05-21 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java 297 2 406 167 3 2 2024-03-28 2024-11-22 13417392+rzo1@users.noreply.github.com 13417392+rzo1@users.noreply.github.com core/src/main/java/org/apache/stormcrawler/filtering/robots/RobotsFilter.java 51 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/depth/MaxDepthFilter.java 49 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/host/HostURLFilter.java 80 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/java/org/apache/stormcrawler/filtering/sitemap/SitemapFilter.java 49 3 406 69 4 3 2024-03-28 2025-02-28 13417392+rzo1@users.noreply.github.com tallison@apache.org core/src/main/java/org/apache/stormcrawler/JSONResource.java 16 1 406 406 2 2 2024-03-28 2024-03-28 13417392+rzo1@users.noreply.github.com julien@digitalpebble.com core/src/main/resources/crawler-default.yaml 85 83 3493 352 98 11 2015-10-15 2024-05-21 julien@digitalpebble.com rzo1@apache.org