spec/integration/redirects_spec.rb (40 lines of code) (raw):
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License 2.0;
# you may not use this file except in compliance with the Elastic License 2.0.
#
# frozen_string_literal: true
RSpec.describe 'Redirect handling' do
let(:results) do
num_redirects = 20
FauxCrawl.crawl_site do
page '/' do
body do
link_to '/simple-redirect'
link_to '/circular-redirect'
link_to '/infinite-redirect'
link_to '/redirect-0'
end
end
# Should not be indexed because it is a redirect
page '/simple-redirect' do
redirect '/hello'
end
# Should be discovered via the /simple-redirect page
page '/hello'
# Should not be indexed because it causes an circular redirect
page '/circular-redirect' do
redirect '/circular-redirect'
end
# Should not be indexed because it causes an infinite redirect
page '/infinite-redirect' do
redirect '/infinite-redirect-step2'
end
page '/infinite-redirect-step2' do
redirect '/infinite-redirect'
end
# Create a chain of redirects that are longer than max_redirects
num_redirects.times do |i|
page "/redirect-#{i}" do
redirect "/redirect-#{i + 1}"
end
end
# Should not be indexed since it is referenced via a redirect chain that is too long
page "/redirect-#{num_redirects}"
end
end
it 'crawls all pages following redirects as needed' do
# The following redirect chains don't show up in results:
#
# circular-redirect (single self-redirect, not followed because we have already seen the link)
# infinite-redirect (infinite redirect chain broken by de-duplication)
# redirect-(n) (way too many redirects)
expect(results).to have_only_these_results [
# Home page (no redirects)
mock_response(url: 'http://127.0.0.1:9393/', status_code: 200),
# First redirect chain: followed a redirect and indexed a page
mock_response(url: 'http://127.0.0.1:9393/hello', status_code: 200)
]
end
end