def start_datacollection()

in src/hpcadvisor/main_gui.py [0:0]


def start_datacollection(user_input_file):
    st.write("### Data Collector ")

    st.text("")
    st.text("")
    st.text("")

    userinput = utils.get_userinput_from_file(user_input_file)

    task_filename = ""

    deployments = utils.list_deployments()

    label = f"Select deployment (total = {len(deployments)}): "

    deployment = st.selectbox(label, deployments)

    str_skus = get_str_from_userinput_list(userinput, "skus")
    print("str_skus=", str_skus)
    str_nnodes = get_str_from_userinput_list(userinput, "nnodes")
    str_ppr = get_str_from_userinput_list(userinput, "ppr")
    text_skus = st.text_input("SKUs", str_skus, key="field_sku")
    text_nnodes = st.text_input("Number of nodes", str_nnodes, key="field_nnodes")
    text_ppr = st.text_input("Processes per resource (%)", str_ppr, key="field_ppr")

    textfield_appinput = get_textfield_from_appinput(userinput)
    text_appinput = st.text_area(
        "Application input per line (<variable=value>)",
        textfield_appinput,
        key="appinput_info",
    )

    field_appsetup = st.text_input(
        "App setup script (git URL)",
        userinput["appsetupurl"],
        key="app_url_setup",
    )

    if "gentasks_key" not in st.session_state:
        st.session_state.gentasks_key = False

    button_show_tasks = st.button("Show/Hide Tasks", key="button_show_tasks")

    if button_show_tasks and st.session_state.gentasks_key == False:
        st.session_state.gentasks_key = True
    elif button_show_tasks and st.session_state.gentasks_key == True:
        st.session_state.gentasks_key = False

    if st.session_state.gentasks_key:
        task_filename = utils.get_task_filename(deployment)
        st.session_state["task_filename"] = task_filename

        data_system = {}
        data_system["sku"] = text_skus.split(",")
        data_system["nnodes"] = text_nnodes.split(",")
        data_system["ppr"] = text_ppr.split(",")

        data_app_input = {}
        for line in text_appinput.splitlines():
            if line.strip():
                key, value = line.split("=")
                data_app_input[key] = value.split(",")
        #
        appname = userinput["appname"]
        tags = {}
        data = taskset_handler.generate_tasks(
            task_filename, data_system, data_app_input, appname, tags
        )
        df = pd.DataFrame(data)
        df = df.set_index("id")

        df.index.name = "task"

        st.dataframe(df, height=200, width=700)

    if (
        "run_datacollector_button" in st.session_state
        and st.session_state.run_datacollector_button == True
    ):
        st.session_state.running_collection = True
    else:
        st.session_state.running_collection = False

    if st.button(
        "Start Data Collection",
        disabled=st.session_state.running_collection,
        key="run_datacollector_button",
    ):
        execution_placeholder = st.empty()

        st.text("This will take a while. Please wait...")
        st.session_state["executionCollectionOn"] = True

        task_filename = utils.get_task_filename(deployment)

        env_file = utils.get_deployments_file(deployment)
        dataset_filename = utils.get_dataset_filename()
        data_collector.collect_data(
            task_filename, dataset_filename, env_file, clear_deployment=False
        )

        log.info(f"finish execution for {deployment}")
        st.success("Benchmark data generated!")
        st.session_state.output = "output generated"
        st.rerun()

        st.session_state["executionCollectionOn"] = False
        st.session_state["running_collection"] = True

    st.text("")
    st.text("")
    st.text("")