dags/map_reproducibility/a4/llama3_1_70b/nemo.py (37 lines of code) (raw):

# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """DAGs to run hypercomputer recipes""" import datetime from airflow import models from dags import composer_env from dags.map_reproducibility.utils.common_utils import get_scheduled_time from dags.map_reproducibility.utils.common_utils import run_nemo_workload MODEL_ID = "llama3-1-70b" METRICS_MODEL_ID = "llama3.1-70b" PRECISION = "fp8" KUEUE_NAME = "a4-high" HYPERCOMPUTER = "a4" FRAMEWORK = "nemo" SCHEDULED_TIME = ( get_scheduled_time(HYPERCOMPUTER, MODEL_ID, FRAMEWORK) if composer_env.is_prod_env() else None ) with models.DAG( dag_id=f"{HYPERCOMPUTER}_recipes_{MODEL_ID}_{FRAMEWORK}", schedule=SCHEDULED_TIME, tags=[ "reproducibility", "experimental", "xlml", "regressiontests", "a3mega", ], start_date=datetime.datetime(2025, 3, 1), catchup=False, ) as dag: run_nemo_workload( hypercomputer=HYPERCOMPUTER, model_id=MODEL_ID, framework=FRAMEWORK, precision=PRECISION, metrics_model_id=METRICS_MODEL_ID, config_model_name="llama3-1-70b-256gpus-a4-fp8.yaml", )