in scripts/execute_sqls.py [0:0]
def parse_sql_str(sql):
# e.g. SELECT Position FROM table_1-10015132-11 WHERE School/Club Team = "Butler CC (KS)"
# e.g. SELECT COUNT(Position) FROM table_1-10015132-9 WHERE Years in Toronto = \"2006-07\"
parsed_sql = {"phase": 1, "query": {}}
# from table
try:
if "WHERE" in sql:
from_table = re.search(r'FROM(.*?)WHERE', sql).group(1).strip().lower() # 'table_1-10015132-11'
else:
from_table = re.search(r'(?<=FROM).*$', sql).group(0).strip().lower()
except:
# print(sql)
parsed_sql["error"] = "Generated SQL is not valid"
return False, parsed_sql
try:
table_id = from_table.replace("table_", "") # '1-10015132-11'
table = tables[table_id]
header = [x.lower() for x in table["header"]]
parsed_sql["table_id"] = table_id
except:
parsed_sql["error"] = "Generated SQL is not valid"
return False, parsed_sql
# select column
sel_col = re.search(r'SELECT(.*?)FROM', sql).group(1).strip() # COUNT(Position)
agg_idx = 0
for idx, op in enumerate(agg_ops):
if op in sel_col and op != '':
sel_col = re.search(f'{op}\((.*?)\)', sel_col)
if sel_col:
sel_col = sel_col.group(1).strip()
agg_idx = idx
break
try:
sel = header.index(sel_col.lower()) # 3
parsed_sql["query"]["sel"] = sel
parsed_sql["query"]["agg"] = agg_idx
except:
parsed_sql["error"] = "Generated SQL is not valid"
return False, parsed_sql
# where clause
conds = []
if "WHERE" in sql:
where = re.search(r'(?<=WHERE).*$', sql).group(0).strip() # 'Position = "Guard" AND Years in Toronto = "1996-97"'
wheres = where.split("AND")
for cond in wheres:
cond = cond.strip() # 'Position = "Guard"'
for idx, op in enumerate(cond_ops):
if op in cond:
try:
col, value = cond.split(op, 1) # split by first occurance
except:
pdb.set_trace()
col = col.strip()
value = value.strip()
try:
sel = header.index(col.lower())
conds.append([sel, idx, json.loads(value)])
except:
pass
break
parsed_sql["query"]["conds"] = conds # can be empty list
parsed_sql["error"] = ""
parsed_sql["split"] = table_id_owner[table_id]
return True, parsed_sql