transcoder/output/json/JsonOutputManager.py (54 lines of code) (raw):

# # Copyright 2022 Google LLC # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import datetime import json from transcoder.message import DatacastSchema, DatacastField from transcoder.output import OutputManager class JsonOutputManager(OutputManager): """Transcode messages to JSON encoding persisted to a file""" def __init__(self, prefix: str, output_path: str, lazy_create_resources: bool = False): super().__init__(lazy_create_resources=lazy_create_resources) self.prefix = prefix self.schemas = {} self.writers = {} self.output_path = self.create_output_path(output_path, 'jsonOut') @staticmethod def output_type_identifier(): return 'jsonl' def _create_field(self, field: DatacastField): return field.create_json_field(field) def _add_schema(self, schema: DatacastSchema): # pylint: disable=duplicate-code if schema.name in self.schemas: del self.schemas[schema.name] if schema.name in self.writers: self.writers[schema.name].close() del self.writers[schema.name] output_file = open( # pylint: disable=consider-using-with self._get_file_name(schema.name, 'jsonl'), 'w', encoding='utf-8') schema_json = { '$schema': 'https://json-schema.org/draft/2019-09/schema', 'type': 'object', 'name': schema.name, 'properties': {}} for field in schema.fields: schema_json['properties'][field.name] = field.create_json_field(field) obj = json.dumps(schema_json) self._save_schema(schema.name, obj) self.schemas[schema.name] = obj self.writers[schema.name] = output_file def _write_record(self, record_type_name, record): self.writers[record_type_name].write(json.dumps(record, default=JsonOutputManager.default_formatter) + '\n') @staticmethod def default_formatter(obj): """Custom encoding to serialize additional types as needed""" if isinstance(obj, (datetime.date, datetime.datetime)): return obj.isoformat() return '' def _save_schema(self, name, schema_json): with open(self.get_schema_file_name(name, 'json'), mode='wt', encoding='utf-8') as file: file.write(schema_json) def get_schema_file_name(self, name, extension): """Returns a file name for the schema file""" return self.output_path + '/' + self.prefix + '-' + name + '.schema.' + extension def _get_file_name(self, name, extension): return self.output_path + '/' + self.prefix + '-' + name + '.' + extension def wait_for_completion(self): super().wait_for_completion() for _, writer in self.writers.items(): writer.close()