lib/connectors/base/adapter.rb (93 lines of code) (raw):

# # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one # or more contributor license agreements. Licensed under the Elastic License; # you may not use this file except in compliance with the Elastic License. # require 'active_support/core_ext/array/wrap' require 'active_support/core_ext/numeric/time' require 'active_support/core_ext/object/deep_dup' require 'active_support/core_ext/object/json' require 'utility' require 'utility/extension_mapping_util' require 'date' require 'json' require 'mime-types' module Connectors module Base class Adapter def self.fields_to_preserve @fields_to_preserve ||= ['body'] .concat(Utility::Constants::THUMBNAIL_FIELDS) .concat(Utility::Constants::SUBEXTRACTOR_RESERVED_FIELDS) .map(&:freeze) .freeze end def self.generate_id_helpers(method_prefix, id_prefix) define_singleton_method("#{method_prefix}_id_to_es_id") do |id| "#{id_prefix}_#{id}" end define_singleton_method("es_id_is_#{method_prefix}_id?") do |es_id| regex_match = /#{id_prefix}_(.+)$/.match(es_id) regex_match.present? && regex_match.size == 2 end define_singleton_method("es_id_to_#{method_prefix}_id") do |es_id| regex_match = /#{id_prefix}_(.+)$/.match(es_id) raise ArgumentError, "Invalid id #{es_id} for source with method prefix #{method_prefix}." if regex_match.nil? || regex_match.length != 2 regex_match[1] end end def self.mime_type_for_file(file_name) ruby_detected_type = MIME::Types.type_for(file_name) return ruby_detected_type.first.simplified if ruby_detected_type.present? extension = extension_for_file(file_name) Utility::ExtensionMappingUtil.get_mime_types(extension)&.first end def self.extension_for_file(file_name) File.extname(file_name.downcase).delete_prefix!('.') end def self.strip_file_extension(file_name) File.basename(file_name, File.extname(file_name)) end def self.normalize_enum(enum) enum&.to_s&.downcase end def self.normalize_date(date) return nil if date.blank? case date when Date, Time, DateTime, ActiveSupport::TimeWithZone date.to_datetime.rfc3339 else begin Time.zone.parse(date).to_datetime.rfc3339 rescue ArgumentError, TypeError => e Utility::ExceptionTracking.capture_exception(e) nil end end end def self.normalize_path(path) return nil if path.blank? return path if path.start_with?('/') "/#{path}" end def self.url_to_path(url) return nil if url.blank? uri = URI(url) return nil if uri.scheme.blank? normalize_path(uri.path) rescue URI::InvalidURIError, ArgumentError nil end def self.es_document_from_configured_object_base(object_type:, object:, fields:) object_as_json = object.as_json adapted_object = { :type => normalize_enum(object_type) } fields.each do |field_data| remote_field_name = field_data.fetch(:remote) value = object_as_json[remote_field_name] value = object_as_json.dig(*remote_field_name.split('.')) if value.blank? next if value.nil? adapted_object[field_data.fetch(:target)] = value end adapted_object.symbolize_keys end delegate :normalize_enum, :normalize_date, :normalize_path, :to => :class end end end