def do_runtime_prediction()

in amzn-smt-prediction/scripts/generate_predictions.py [0:0]
57 lines of code
12 McCabe index (conditional complexity)

def do_runtime_prediction(problem_file, endpoint_runtime):

    # Check for offline feature generation executable
    if not isfile(path_to_off_exec):
        print("ERROR: In order to use this script (as configured), you must compile a binary for 'amzn-smt-prediction' (and have it in the right place).")
        print("It is necessary to generate the features for problems of the theory type specified in config file.")
        print("From the 'rust-smt-ir/amzn-smt-prediction' directory, run:")
        print("cargo build --release --target-dir .")
        exit(1)

    # Check that the config file includes a reference to a SageMaker endpoint for the Runtime Prediction Model
    if endpoint_runtime == None:
        missing_endpoint_error("QF_SLIA", "endpoint_runtime")
        exit(1)

    # Create a temporary file to write our "benchmark list" to (just the single file we want to run predictions on)
    # And another for the offline feature generation executable to write its output to
    tmp_bench_list = NamedTemporaryFile(mode='w')

    # The offline feature generation executable expects a name for the output, with which it creates a file
    # "off_features_<name>.csv" in the current directory. We use the time so that we get a unique name each run
    tmp_output_ext = str(int(time()))
    tmp_output = "off_features_" + tmp_output_ext + ".csv"
        
    tmp_bench_list.write(problem_file)
    tmp_bench_list.flush() # Must flush since no newline


    #####  GENERATE OFFLINE FEATURES #####

    if v == 'Full': print("Generating offline features... (calling amzn-smt-prediction)")
    raw_out = subprocess.run([path_to_off_exec, "script", tmp_bench_list.name, tmp_output_ext], capture_output=True, text=True)
    out = raw_out.stdout.strip()
    if v == 'Full': print("Done.\n")

    # Get results
    with open(tmp_output) as fp:
        off_features_labels = fp.readline().strip()
        off_features_string = fp.readline().strip()

    # Clean up temporary file generated by offline feature generation executable
    subprocess.run(["rm", tmp_output])

    ##### GENERATE ONLINE FEATURES #####

    # The functions which generate the solver output and convert it into the model
    # features read and write from files. Here we use temporary files so that they
    # are automatically cleaned up when the script terminates.

    if v == 'Full': print("Generating online features... (running CVC4 for 10 seconds)")

    # Get CVC4's output on the problem
    solver_output = get_cvc4_output(problem_file, fmf=fmf)

    # Write the output to a temporaray file so the write_word_counts() function can read it in
    tmp_raw_out = NamedTemporaryFile(mode='w')
    tmp_raw_out.write(solver_output) # Write output to a temporary file
    tmp_raw_out.flush()
       
    # File pointer for word count function to read raw output from
    fp_raw_out = open(tmp_raw_out.name, 'r') 

    # Temporary file for write_word_counts() to write word counts to
    tmp_count_out = NamedTemporaryFile(mode='w', buffering=1)
    write_word_counts(fp_raw_out, tmp_count_out)

    # File pointer for get_features() to read word counts from
    fp_count_out = open(tmp_count_out.name, 'r')

    if not buckets == None:
        on_features_labels = get_bucket_labels(buckets).strip()
        on_features = get_features(fp_count_out, buckets)
    else:
        on_features_labels = get_bucket_labels().strip()
        on_features = get_features(fp_count_out)

    if v == 'Full': print("Done.")

    # Convert features to list of integers
    feature_labels = off_features_labels + ',' + on_features_labels
    features = [int(x) for x in off_features_string.split(',')] + on_features

    # Normalize features, if means and stds are provided in config
    if not feature_means1 == None and not feature_stds1 == None:
        features = normalize_features(features, feature_means1, feature_stds1)

    if v == 'Full': print("Feature Labels: " + feature_labels)
    if v == 'Full': print("Feature Vector: " + str(features) + "\n")


    ##### GENERATE INFERENCE #####

    # Create a Predictor object which references the endpoint in AWS SageMaker
    predictor = Predictor(endpoint_runtime, serializer=CSVSerializer())
    # Call the endpoint to get a prediction for our example
    prediction = get_prediction(predictor, features)

    confidence = round(max(prediction) * 100, 2)
    runtime    = outputs_runtime[np.argmax(prediction)]

    if v == 'Full': print("Possible Outputs: "+ str(outputs_runtime))
    if v == 'Full': print("Probabilities: " + str(prediction) + "\n")

    fmf_status = "ENABLED" if fmf else "DISABLED"
    if v == 'Full' or v == 'Pretty': 
        print(start_green + "The Runtime Model predicted -- with " + str(confidence) + "% confidence -- that as configured (--strings-fmf " + fmf_status +  "), CVC4 will: " + runtime + end_green)
    elif v == 'Vector':
        print(prediction)

    return on_features