elasticsearch/spec/integration/helpers/bulk_helper_spec.rb (174 lines of code) (raw):
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
require_relative File.expand_path('../../spec_helper', __dir__)
require 'elasticsearch/helpers/bulk_helper'
require 'tempfile'
context 'Elasticsearch client helpers' do
context 'Bulk helper' do
let(:index) { 'bulk_animals' }
let(:index_slice) { 'bulk_animals_slice' }
let(:params) { { refresh: 'wait_for' } }
let(:bulk_helper) { Elasticsearch::Helpers::BulkHelper.new(CLIENT, index, params) }
let(:docs) do
[
{ scientific_name: 'Lama guanicoe', name: 'Guanaco' },
{ scientific_name: 'Tayassu pecari', name: 'White-lipped peccary' },
{ scientific_name: 'Snycerus caffer', name: 'Buffalo, african' },
{ scientific_name: 'Coluber constrictor', name: 'Snake, racer' },
{ scientific_name: 'Thalasseus maximus', name: 'Royal tern' },
{ scientific_name: 'Centrocercus urophasianus', name: 'Hen, sage' },
{ scientific_name: 'Sitta canadensis', name: 'Nuthatch, red-breasted' },
{ scientific_name: 'Aegypius tracheliotus', name: 'Vulture, lappet-faced' },
{ scientific_name: 'Bucephala clangula', name: 'Common goldeneye' },
{ scientific_name: 'Felis pardalis', name: 'Ocelot' }
]
end
after do
CLIENT.indices.delete(index: index, ignore: 404)
CLIENT.indices.delete(index: index_slice, ignore: 404)
end
it 'Ingests documents' do
response = bulk_helper.ingest(docs)
expect(response).to be_an_instance_of Elasticsearch::API::Response
expect(response.status).to eq(200)
expect(response['items'].map { |a| a['index']['status'] }.uniq.first).to eq 201
end
it 'Updates documents' do
docs = [
{ scientific_name: 'Otocyon megalotos', name: 'Bat-eared fox' },
{ scientific_name: 'Herpestes javanicus', name: 'Small Indian mongoose' }
]
bulk_helper.ingest(docs)
# Get the ingested documents, add id and modify them to update them:
animals = CLIENT.search(index: index)['hits']['hits']
# Add id to each doc
docs = animals.map { |animal| animal['_source'].merge({ 'id' => animal['_id'] }) }
docs.map { |doc| doc['scientific_name'].upcase! }
response = bulk_helper.update(docs)
expect(response.status).to eq(200)
expect(response['items'].map { |i| i['update']['result'] }.uniq.first).to eq('updated')
end
it 'Deletes documents' do
response = bulk_helper.ingest(docs)
ids = response.body['items'].map { |a| a['index']['_id'] }
response = bulk_helper.delete(ids)
expect(response.status).to eq 200
expect(response['items'].map { |item| item['delete']['result'] }.uniq.first).to eq('deleted')
expect(CLIENT.count(index: index)['count']).to eq(0)
end
it 'Ingests documents and yields response and docs' do
slice = 2
bulk_helper = Elasticsearch::Helpers::BulkHelper.new(CLIENT, index_slice, params)
bulk_helper.ingest(docs, { slice: slice }) do |response, docs|
expect(response).to be_an_instance_of Elasticsearch::API::Response
expect(docs.count).to eq slice
end
response = CLIENT.search(index: index_slice, size: 200)
expect(response['hits']['hits'].map { |a| a['_source'].transform_keys(&:to_sym) }).to eq docs
end
context 'JSON File helper' do
let(:file) { Tempfile.new('test-data.json') }
let(:json) do
json = <<~JSON
[
{
"character_name": "Anallese Lonie",
"species": "mouse",
"catchphrase": "Seamless regional definition",
"favorite_food": "pizza"
},
{
"character_name": "Janey Davidovsky",
"species": "cat",
"catchphrase": "Down-sized responsive pricing structure",
"favorite_food": "pizza"
},
{
"character_name": "Morse Mountford",
"species": "cat",
"catchphrase": "Ameliorated modular data-warehouse",
"favorite_food": "carrots"
},
{
"character_name": "Saundra Kauble",
"species": "dog",
"catchphrase": "Synchronised 24/7 support",
"favorite_food": "carrots"
},
{
"character_name": "Kain Viggars",
"species": "cat",
"catchphrase": "Open-architected asymmetric circuit",
"favorite_food": "carrots"
}
]
JSON
end
before do
file.write(json)
file.rewind
end
after do
file.close
file.unlink
end
it 'Ingests a JSON file' do
response = bulk_helper.ingest_json(file)
expect(response).to be_an_instance_of Elasticsearch::API::Response
expect(response.status).to eq(200)
end
context 'with data not in root of JSON file' do
let(:json) do
json = <<~JSON
{
"field": "value",
"status": 200,
"data": {
"items": [
{
"character_name": "Anallese Lonie",
"species": "mouse",
"catchphrase": "Seamless regional definition",
"favorite_food": "pizza"
},
{
"character_name": "Janey Davidovsky",
"species": "cat",
"catchphrase": "Down-sized responsive pricing structure",
"favorite_food": "pizza"
},
{
"character_name": "Morse Mountford",
"species": "cat",
"catchphrase": "Ameliorated modular data-warehouse",
"favorite_food": "carrots"
},
{
"character_name": "Saundra Kauble",
"species": "dog",
"catchphrase": "Synchronised 24/7 support",
"favorite_food": "carrots"
},
{
"character_name": "Kain Viggars",
"species": "cat",
"catchphrase": "Open-architected asymmetric circuit",
"favorite_food": "carrots"
}
]
}
}
JSON
end
it 'Ingests a JSON file passing keys as Array' do
response = bulk_helper.ingest_json(file, { keys: ['data', 'items'] })
expect(response).to be_an_instance_of Elasticsearch::API::Response
expect(response.status).to eq(200)
expect(response['items'].map { |a| a['index']['status'] }.uniq.first).to eq 201
end
it 'Ingests a JSON file passing keys as String' do
response = bulk_helper.ingest_json(file, { keys: 'data,items' })
expect(response).to be_an_instance_of Elasticsearch::API::Response
expect(response.status).to eq(200)
expect(response['items'].map { |a| a['index']['status'] }.uniq.first).to eq 201
end
end
end
end
end