dagify/converter/rules.py (71 lines of code) (raw):

# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import codecs import pandas as pd import random import uuid class Rule: def __init__(self): pass def run(self, args): method_name = "rule_{0}".format(args[0]) if self.__can_execute(method_name): func = getattr(self, method_name) return func(args[1:]) else: print(f"Error: Rule not found: {args[0]}") return args[1] def __can_execute(self, method_name): return method_name in dir(self) # Define Rule - LowerCase def rule_lowercase(self, vals): print(f"Info: Rule Lowercase: {vals[0]}") vals[0] = vals[0].lower() return vals # Define Rule - Replace Characters def rule_replace(self, vals): print(f"Info: Rule Replace Characters: {vals[1]} -> {vals[2]} output = {vals[0]}") vals[0] = vals[0].replace(vals[1], vals[2]) return vals # Define Rule - Python Variable Safe def rule_python_variable_safe(self, vals): print(f"Info: Rule Python Variable Safe: {vals[0]}") vals = self.rule_lowercase(vals) for char in ['-', ' ', '.', ':', ';', "$", "!", ",", "#"]: if char in vals[0]: vals = self.rule_replace([vals[0], char, "_"]) return vals[0] def rule_prefix(self, vals): if len(vals) < 2: print("Error: Not Enough Variables passed to Prefix Rule") return print(f"Info: Rule Prefix: {vals[0]}") vals[0] = vals[1] + "_" + vals[0] return vals[0] def rule_suffix(self, vals): if len(vals) < 2: print("Error: Not Enough Variables passed to Suffix Rule") return print(f"Info: Rule Suffix: {vals[0]}") vals[0] = vals[0] + "_" + vals[1] return vals[0] def rule_escape_quotes(self, vals): print(f"Info: Rule Escape Quotes: {vals[0]}") for char in ["'", '"', "`"]: if char in vals[0]: vals = self.rule_replace([vals[0], char, f"\\{char}"]) return vals[0] def rule_make_unique(self, vals): print(f"Info: Rule Make Unique: {vals[0]}") random.seed() rnd = random.randint(0, 1000000) uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, str(vals[0] + str(rnd))))[:5] vals[0] = self.rule_suffix([vals[0], uid]) return vals[0] def rule_obfuscate(self, vals): print(f"Info: Rule Obfuscate: {vals[0]}") vals[0] = codecs.encode(vals[0], 'rot13') return vals[0] def rule_lookup_replace(self, vals): print(f"Info: Rule Lookup Replace: {vals[0]}") # vals[0] is Lookup Value # vals[1] is Lookup File Path # vals[2] is Lookup Return Column if len(vals) < 3: print("Error: Not Enough Variables passed to Lookup Replace Rule") return vals[0] df = pd.read_csv(vals[1], header=0) print(df) return vals[0]