python/dataproc_templates/base_template.py (18 lines of code) (raw):

# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import annotations from typing import Dict, Sequence, Optional, Any from abc import ABC as AbstractClass, abstractmethod from logging import Logger from pyspark.sql import SparkSession __all__ = ['BaseTemplate'] class BaseTemplate(AbstractClass): """Base class for all Dataproc Templates""" def get_logger(self, spark: SparkSession) -> Logger: """ Convenience method to get the Spark logger from a SparkSession Args: spark (SparkSession): The initialized SparkSession object Returns: Logger: The Spark logger """ log_4j_logger = spark.sparkContext._jvm.org.apache.log4j # pylint: disable=protected-access return log_4j_logger.LogManager.getLogger(__name__) @classmethod def build(cls) -> BaseTemplate: """ Factory method for building an instance of this template class. """ return cls() @staticmethod @abstractmethod def parse_args(args: Optional[Sequence[str]] = None) -> Dict[str, Any]: """ Parses this template's arguments, returning them as a dictionary. Implementations of this method should ignore unknown arguments. Args: args (Optional[Sequence[str]]): The template arguments. By default, command line arguments are used. """ @abstractmethod def run(self, spark: SparkSession, args: Dict[str, Any]) -> None: """ Runs this template """