in voxpopuli/get_lm_data.py [0:0]
def remove_parentheses(text: str) -> str:
# remove all substring within () or []
out = ""
num_p = 0
start_i = 0
for i, c in enumerate(text):
if c == "(" or c == "[":
if num_p == 0 and i > start_i:
out += text[start_i:i]
num_p += 1
elif c == ")" or c == "]":
num_p -= 1
if num_p == 0:
start_i = i + 1
if len(text) > start_i:
out += text[start_i:]
return out