scripts/translate.py (95 lines of code) (raw):
import requests
import os
import argparse
import sys
import json
import uuid
# Variables
translate_keys = ('description', 'name', 'category', 'subcategory', 'waf', 'text', 'severity')
translate_languages = ['es', 'ja', 'pt', 'ko', 'zh-Hant']
# Get environment variables
translator_endpoint = os.environ["AZURE_TRANSLATOR_ENDPOINT"]
translator_region = os.environ["AZURE_TRANSLATOR_REGION"]
translator_key = os.environ["AZURE_TRANSLATOR_SUBSCRIPTION_KEY"]
translator_url = translator_endpoint + 'translate'
# Get input arguments
parser = argparse.ArgumentParser(description='Translate a JSON file')
parser.add_argument('--input-file-name', dest='file_name_in', action='store',
help='you need to supply file name where your JSON to be translated is located')
parser.add_argument('--output-file-name', dest='file_name_out', action='store',
help='you need to supply file name where the translated JSON will be saved')
parser.add_argument('--verbose', dest='verbose', action='store_true',
default=False,
help='run in verbose mode (default: False)')
args = parser.parse_args()
# Check we have all information
if translator_endpoint and translator_region and translator_key:
if args.verbose:
print('DEBUG: environment variables retrieved successfully: {0}, {1}, {2}'.format(translator_endpoint, translator_region, translator_key))
else:
print('ERROR: couldnt retrieve environment variables for translation')
sys.exit(1)
# Get JSON
try:
with open(args.file_name_in) as f:
checklist = json.load(f)
except Exception as e:
print("ERROR: Error when processing JSON file", args.file_name_in, "-", str(e))
sys.exit(1)
# Function to translate a single line of text to a single language
def translate_text(text_to_translate, languages):
if args.verbose:
print('DEBUG: translating text "{0}" on {1}...'.format(text_to_translate, translator_url))
# If a single languages specified, convert to array
if not type(languages) == list:
languages = [languages]
# Azure Translator parameters
translator_params = {
'api-version': '3.0',
'from': 'en',
'to': languages
}
translator_headers = {
'Ocp-Apim-Subscription-Key': translator_key,
'Ocp-Apim-Subscription-Region': translator_region,
'Content-type': 'application/json',
'Accept': 'application/json',
'X-ClientTraceId': str(uuid.uuid4())
}
translator_body = [{
'text': text_to_translate
}]
if args.verbose:
print ("DEBUG: sending body", str(translator_body))
print ("DEBUG: sending HTTP headers", str(translator_headers))
print ("DEBUG: sending parameters", str(translator_params))
try:
request = requests.post(translator_url, params=translator_params, headers=translator_headers, json=translator_body)
response = request.json()
if args.verbose:
print("DEBUG: translator response:")
print(json.dumps(response, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': ')))
return str(response[0]['translations'][0]['text'])
except Exception as e:
print("ERROR: Error in translation:", str(e))
# Go over all keys and translate them if required
def translate_object(checklist_object, language):
# Only process if dictionary is provided as argument, to avoid errors when strings are provided
if str(type(checklist_object)) == "<class 'dict'>":
translated_object = checklist_object.copy()
for (k, v) in translated_object.items():
if isinstance(v, list):
translated_items = []
for list_item in v:
translated_items.append(translate_object(list_item, language))
translated_object[k] = translated_items
else:
if k in translate_keys:
# print("Found key", k, "and scalar value", v)
translated_object[k] = translate_text(v, language)
return translated_object
################
# Main #
################
if args.verbose:
print("DEBUG: Starting translations for languages", str(translate_languages))
for using_language in translate_languages:
print("INFO: Starting translation to", using_language)
translated_checklist = translate_object(checklist, using_language)
# If no output file was specified, use the input file, and append the language as extension before .json
if not args.file_name_out:
file_name_in_base = os.path.basename(args.file_name_in)
file_name_in_dir = os.path.dirname(args.file_name_in)
file_name_in_noext = file_name_in_base.split('.')[0]
file_name_out = file_name_in_noext + '.' + using_language + '.json'
file_name_out = os.path.join(file_name_in_dir, file_name_out)
print("INFO: saving output file to", file_name_out)
translated_checklist_string = json.dumps(translated_checklist, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': '))
with open(file_name_out, 'w', encoding='utf-8') as f:
f.write(translated_checklist_string)
f.close()
# print(json.dumps(translated_checklist, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': ')))