in src/autotrain/backends/nvcf.py [0:0]
def create(self):
hf_token = self.env_vars["HF_TOKEN"]
job_name = f"{self.username}-{self.params.project_name}"
logger.info("Starting NVCF training")
logger.info(f"job_name: {job_name}")
logger.info(f"backend: {self.backend}")
nvcf_url_submit = f"{NVCF_API}/invoke/{self.available_hardware[self.backend]['id']}"
org_name = os.environ.get("SPACE_ID")
if org_name is None:
raise ValueError("SPACE_ID environment variable is not set")
org_name = org_name.split("/")[0]
nvcf_fr_payload = {
"cmd": [
"conda",
"run",
"--no-capture-output",
"-p",
"/app/env",
"python",
"-u",
"-m",
"uvicorn",
"autotrain.app.training_api:api",
"--host",
"0.0.0.0",
"--port",
"7860",
],
"env": {key: value for key, value in self.env_vars.items()},
"ORG_NAME": org_name,
}
nvcf_fn_req = self._conf_nvcf(
token=hf_token,
nvcf_type="job_submit",
url=nvcf_url_submit,
job_name=job_name,
method="POST",
payload=nvcf_fr_payload,
)
nvcf_url_reqpoll = f"{NVCF_API}/status/{nvcf_fn_req}"
logger.info(f"{job_name}: Polling : {nvcf_url_reqpoll}")
poll_thread = threading.Thread(
target=self._poll_nvcf,
kwargs={
"url": nvcf_url_reqpoll,
"token": hf_token,
"job_name": job_name,
"method": "GET",
"timeout": 172800,
"interval": 20,
},
)
poll_thread.start()
return nvcf_fn_req