spec/integration/charset_spec.rb (44 lines of code) (raw):
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License 2.0;
# you may not use this file except in compliance with the Elastic License 2.0.
#
# frozen_string_literal: true
RSpec.describe 'Content charset' do
let(:site) do
Faux.site do
page '/' do
body do
link_to '/utf8-without-charset'
link_to '/utf8-with-charset'
end
end
page '/utf8-with-charset' do
headers 'Content-Type' => 'text/html; charset=UTF-8'
body do
text { "ma\u00F1ana ol\u00E9" }
end
end
page '/utf8-without-charset' do
headers 'Content-Type' => 'text/html'
body do
text { "ma\u00F1ana ol\u00E9" }
end
end
end
end
it 'defaults to UTF-8' do
results = FauxCrawl.run(site)
expect(results).to have_only_these_results [
mock_response(url: 'http://127.0.0.1:9393/', status_code: 200),
mock_response(url: 'http://127.0.0.1:9393/utf8-with-charset', status_code: 200,
content: "<html><body>ma\u00F1ana ol\u00E9</body></html>"),
mock_response(url: 'http://127.0.0.1:9393/utf8-without-charset', status_code: 200,
content: "<html><body>ma\u00F1ana ol\u00E9</body></html>")
]
end
it 'can override fallback encoding' do
results = FauxCrawl.run(site, default_encoding: 'ISO-8859-1')
expect(results).to have_only_these_results [
mock_response(url: 'http://127.0.0.1:9393/', status_code: 200),
mock_response(url: 'http://127.0.0.1:9393/utf8-with-charset', status_code: 200,
content: "<html><body>ma\u00F1ana ol\u00E9</body></html>"),
mock_response(url: 'http://127.0.0.1:9393/utf8-without-charset', status_code: 200,
content: String.new("<html><body>ma\xC3\xB1ana ol\xC3\xA9</body></html>", encoding: 'ISO-8859-1'))
]
end
end