transcoder/output/OutputManager.py (68 lines of code) (raw):
#
# Copyright 2022 Google LLC
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import concurrent
import os
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
from transcoder.message import DatacastField, DatacastSchema
from transcoder.output.exception import OutputFunctionNotDefinedError, OutputManagerSchemaError
class OutputManager:
"""Abstract output manager class"""
@staticmethod
def output_type_identifier():
"""Returns identifier of output type"""
raise OutputFunctionNotDefinedError
@staticmethod
def supports_data_writing():
"""Returns flag indicating if data writes are supported"""
return True
@staticmethod
def supports_zero_field_schemas():
"""Returns flag indicating if the output manager support schemas with zero fields"""
return False
def __init__(self, schema_max_workers=5, lazy_create_resources: bool = False):
self.schema_thread_pool_executor: ThreadPoolExecutor = concurrent.futures.ThreadPoolExecutor(
max_workers=schema_max_workers)
self.lazy_create_resources = lazy_create_resources
self.existing_schemas = {}
self.schema_definitions = {}
self.schema_futures = []
def _create_field(self, field: DatacastField):
raise OutputFunctionNotDefinedError
def _get_field_list(self, fields: [DatacastField]):
return list(map(self._create_field, fields))
def enqueue_schema(self, schema):
"""Enqueues a schema for creation. If lazy_create_resources is enabled, schemas will only be created
on-demand when required """
if self.lazy_create_resources is False:
future = self.schema_thread_pool_executor.submit(self.add_schema, schema)
self.schema_futures.append(future)
self.schema_definitions[schema.name] = schema
def add_schema(self, schema: DatacastSchema):
"""Adds a DatacastSchema instance to list of known message schemas"""
self._add_schema(schema)
if schema.name not in self.existing_schemas:
self.existing_schemas[schema.name] = True
def _add_schema(self, schema: DatacastSchema):
pass
def write_record(self, record_type_name, record):
"""For record of given type optionally lazily creation resources and write record"""
if self.lazy_create_resources is True and record_type_name not in self.existing_schemas:
schema = self.schema_definitions[record_type_name]
self.add_schema(schema)
self._write_record(record_type_name, record)
def _write_record(self, record_type_name, record):
raise OutputFunctionNotDefinedError
def wait_for_schema_creation(self):
"""Wait for enqueued schema resources. Nothing to wait for if lazy_create_resources is enabled."""
self.schema_thread_pool_executor.shutdown(wait=True)
result = futures.wait(self.schema_futures)
exceptions = []
for future in result.done:
exception = future.exception()
if exception:
exceptions.append(exception)
if len(exceptions) > 0:
raise OutputManagerSchemaError(exceptions)
def wait_for_completion(self):
"""Extend or override to wait until output manager has fully completed writing and other work"""
self.wait_for_schema_creation()
def create_output_path(self, output_path: str, relative_path: str):
"""Creates the output path if it doesn't exist. Output path will be created as {output_path}/{relative_path}"""
_output_path = None
if output_path is None:
main_script_dir = os.getcwd()
_output_path = os.path.join(main_script_dir, relative_path)
else:
_output_path = output_path
exists = os.path.exists(_output_path)
if not exists:
os.makedirs(_output_path)
return _output_path