def parse_sql_str()

in scripts/execute_sqls.py [0:0]


def parse_sql_str(sql):

    # e.g. SELECT Position FROM table_1-10015132-11 WHERE School/Club Team = "Butler CC (KS)"
    # e.g. SELECT COUNT(Position) FROM table_1-10015132-9 WHERE Years in Toronto = \"2006-07\"

    parsed_sql = {"phase": 1, "query": {}}

    # from table
    try:
        if "WHERE" in sql:
            from_table = re.search(r'FROM(.*?)WHERE', sql).group(1).strip().lower() # 'table_1-10015132-11'
        else:
            from_table = re.search(r'(?<=FROM).*$', sql).group(0).strip().lower()
    except:
        # print(sql)
        parsed_sql["error"] = "Generated SQL is not valid"
        return False, parsed_sql
    try:
        table_id = from_table.replace("table_", "") # '1-10015132-11'
        table = tables[table_id]
        header = [x.lower() for x in table["header"]]
        parsed_sql["table_id"] = table_id
    except:
        parsed_sql["error"] = "Generated SQL is not valid"
        return False, parsed_sql

    # select column
    sel_col = re.search(r'SELECT(.*?)FROM', sql).group(1).strip() # COUNT(Position)
    agg_idx = 0
    for idx, op in enumerate(agg_ops):
        if op in sel_col and op != '':
            sel_col = re.search(f'{op}\((.*?)\)', sel_col)
            if sel_col:
                sel_col = sel_col.group(1).strip()
                agg_idx = idx
            break

    try:
        sel = header.index(sel_col.lower()) # 3
        parsed_sql["query"]["sel"] = sel
        parsed_sql["query"]["agg"] = agg_idx
    except:
        parsed_sql["error"] = "Generated SQL is not valid"
        return False, parsed_sql

    # where clause
    conds = []
    if "WHERE" in sql:
        where = re.search(r'(?<=WHERE).*$', sql).group(0).strip() # 'Position = "Guard" AND Years in Toronto = "1996-97"'
        wheres = where.split("AND")
        for cond in wheres:
            cond = cond.strip() # 'Position = "Guard"'
            for idx, op in enumerate(cond_ops):
                if op in cond:
                    try:
                        col, value = cond.split(op, 1) # split by first occurance
                    except:
                        pdb.set_trace()
                    col = col.strip()
                    value = value.strip()
                    try:
                        sel = header.index(col.lower())
                        conds.append([sel, idx, json.loads(value)])
                    except:
                        pass
                    break

    parsed_sql["query"]["conds"] = conds # can be empty list
    parsed_sql["error"] = ""
    parsed_sql["split"] = table_id_owner[table_id]

    return True, parsed_sql