in bulkprovision/bulkmonitor/__init__.py [0:0]
def HandleFailed(self):
'''
Handles failed status flags and retry logic. Entries will either be retried or end up FAILED
'''
# get the provisioning products and poll them for updates
prov_items = self.dynamo_query("status",self._status)
logger.info("Found {} items to Recover with status {}".format(len(prov_items), self._status))
count = 0
errors = 0
for drow in prov_items:
# grab some initial values to be used later
failed = False
errordetails = None
retry=False
ppdetails = None
# keep the error message if we have one
if "errordetails" in drow:
errordetails = drow["errordetails"]
dbstatus = drow["status"] # this is the status key from dynamo - status used for this program to track
param_dict = drow["launchparams"]
guidkey = drow["guidkey"]
#do we even have a product or did it fail before launcnh?
if drow["scproductdetails"] is None:
# product totatally failed, mark it as retry
retry=True
else:
#continuing, if we have product details we can build the ppdetails object for later use
ppid= drow["scproductdetails"]["ProvisionedProductId"]
ppdetails = {
'ProvisionedProductId':ppid,
'RecordId':drow["scproductdetails"]['RecordId'],
'CreatedTime':str(drow["scproductdetails"]['CreatedTime']),
'Status':drow["scproductdetails"]['Status']
}
try:
# get the product then record the important parts of the response
resp = self.sc_describe_prov_product(ppid)
prod_status = resp['ProvisionedProductDetail']['Status'] # this is the status from SC we use this to set our own dbstatus value
ppdetails['RecordId']=resp['ProvisionedProductDetail']['LastRecordId']
ppdetails['Status'] = prod_status
# handle the status messages
if prod_status == 'AVAILABLE':
dbstatus = "AVAILABLE"
elif prod_status in ['TAINTED','ERROR']:
if dbstatus in ["PROVISIONING","STATUS-ERROR","PRODUCT-ERROR"]:
# We are here becuase the product was marked as provisioning by us, but SC returned an error
# if it failed to provision, lets terminate it in Service Catalog
resp = self.sc_terminate_product(ppid)
failed = True
else:
# failed at some other point - failed termination??
errors += 1
dbstatus = "PRODUCT-ERROR"
else:
# something else is wrong - lets just terminate
resp = self.sc_terminate_product(ppid)
failed = True
except ClientError as ce:
msg = ce.response['Error']['Message']
# tried to find the product details, but it is gone. So we asked it to be terminated and that has now happened
if msg.startswith("Provisioned product not found: "):
# instead of marking it terminated like normal, we mark it RETRY and let the provisioner run it again
retry = True
elif msg.startswith("Can't terminate provisioned product because it's still under change or its status does not allow further operation"):
# its in a terminating state?? just wait on this one
# leave the orginal error in place
failed = True
else:
# something wrong from the API call. this is where you will see the Errors returned from SC
errordetails =ce.response['Error']
logger.error("ClientError: {}".format(msg))
failed = True
except Exception as e:
# Something else wrong?
logger.error(e)
failed = True
else:
count += 1
if not 'retries' in param_dict:
param_dict['retries'] = 0
if retry:
# instead of marking it terminated like normal, we mark it RETRY and let the provisioner run it again
param_dict['retries'] = param_dict['retries'] + 1
if param_dict['retries'] < self._retries:
dbstatus = "RETRY"
count += 1
else:
# this is our final stop, we retried and could not proceed any further
logger.error("Provision failed for {} after {} tries, giving up. Error Message:{}".format(guidkey, param_dict['retries'], errordetails))
errors += 1
dbstatus = "FAILED"
if failed:
errors += 1
if param_dict['retries'] < self._retries:
logger.error("Provision failed for {} after {} tries, giving up. Error Message:{}".format(guidkey, param_dict['retries'], errordetails))
dbstatus = "FAILED"
else:
dbstatus = "TERMINATING-FAILURE"
#update the dynamo table
self.updateItem(guidkey,drow["status"],dbstatus, param_dict, ppdetails, errordetails)
if len(prov_items) > 0:
logger.info("Recovered {} of {} products with {} errors using status:{}".format(count, len(prov_items), errors, self._status))
return(self.generate_return(count))