spec/integration/nofollow_spec.rb (30 lines of code) (raw):
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License 2.0;
# you may not use this file except in compliance with the Elastic License 2.0.
#
# frozen_string_literal: true
RSpec.describe 'Robots meta support' do
let(:results) do
FauxCrawl.crawl_site do
page '/' do
body do
link_to '/noindex'
link_to '/nofollow'
# This link will not be followed
link_to '/unreachable', rel: :nofollow
end
end
# Should not be indexed, but the links should be followed
page '/noindex' do
head { robots 'noindex' }
body { link_to '/foo' }
end
# Should be indexed, but the links should not be followed
page '/nofollow' do
head { robots 'nofollow' }
body { link_to '/unreachable' }
end
# Only reachable via /noindex
page '/foo'
# Only reachable via nofollow links and pages, so the crawler won't ever find this
page '/unreachable'
end
end
it 'crawls all pages given the constraints specified by robots meta tags' do
expect(results).to have_only_these_results [
mock_response(url: 'http://127.0.0.1:9393/', status_code: 200),
mock_response(url: 'http://127.0.0.1:9393/nofollow', status_code: 200),
mock_response(url: 'http://127.0.0.1:9393/foo', status_code: 200)
]
end
end