transcoder/output/avro/BaseAvroOutputManager.py (38 lines of code) (raw):
#
# Copyright 2022 Google LLC
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
from transcoder.message import DatacastField, DatacastSchema
from transcoder.output import OutputManager
from transcoder.output.exception import OutputFunctionNotDefinedError
class BaseAvroOutputManager(OutputManager):
"""Base avro output manager implementation. Used by both avro.io and fastavro implementations."""
@staticmethod
def supports_zero_field_schemas():
"""Returns flag indicating if the output manager support schemas with zero fields"""
return True
def __init__(self, prefix: str, output_path: str, lazy_create_resources: bool = False):
super().__init__(lazy_create_resources=lazy_create_resources)
self.prefix = prefix
self.schemas = {}
self.writers = {}
self.output_path = self.create_output_path(output_path, 'avroOut')
def _create_field(self, field: DatacastField):
return field.create_avro_field()
def _add_schema(self, schema: DatacastSchema):
# pylint: disable=duplicate-code
_fields = self._get_field_list(schema.fields)
if schema.name in self.schemas:
del self.schemas[schema.name]
if schema.name in self.writers:
self.writers[schema.name].close()
del self.writers[schema.name]
schema_dict = {'type': 'record', 'namespace': 'sbeMessage', 'name': schema.name, 'fields': _fields}
schema_json = json.dumps(schema_dict)
self._save_schema(schema.name, schema_json)
self.schemas[schema.name] = self._parse_schema(schema_dict)
def _parse_schema(self, schema_dict):
raise OutputFunctionNotDefinedError
def _save_schema(self, name, schema_json):
with open(self._get_file_name(name, 'avsc'), mode='wt', encoding='utf-8') as file:
file.write(schema_json)
def _get_file_name(self, name, extension):
return self.output_path + '/' + self.prefix + '-' + name + '.' + extension
def wait_for_completion(self):
super().wait_for_completion()
for _, writer in self.writers.items():
writer.close()