in genre/utils.py [0:0]
def create_input(doc, max_length, start_delimiter, end_delimiter):
if "meta" in doc and all(
e in doc["meta"] for e in ("left_context", "mention", "right_context")
):
if len(doc["input"].split(" ")) <= max_length:
input_ = (
doc["meta"]["left_context"]
+ " {} ".format(start_delimiter)
+ doc["meta"]["mention"]
+ " {} ".format(end_delimiter)
+ doc["meta"]["right_context"]
)
elif len(doc["meta"]["left_context"].split(" ")) <= max_length // 2:
input_ = (
doc["meta"]["left_context"]
+ " {} ".format(start_delimiter)
+ doc["meta"]["mention"]
+ " {} ".format(end_delimiter)
+ " ".join(
doc["meta"]["right_context"].split(" ")[
: max_length - len(doc["meta"]["left_context"].split(" "))
]
)
)
elif len(doc["meta"]["right_context"].split(" ")) <= max_length // 2:
input_ = (
" ".join(
doc["meta"]["left_context"].split(" ")[
len(doc["meta"]["right_context"].split(" ")) - max_length :
]
)
+ " {} ".format(start_delimiter)
+ doc["meta"]["mention"]
+ " {} ".format(end_delimiter)
+ doc["meta"]["right_context"]
)
else:
input_ = (
" ".join(doc["meta"]["left_context"].split(" ")[-max_length // 2 :])
+ " {} ".format(start_delimiter)
+ doc["meta"]["mention"]
+ " {} ".format(end_delimiter)
+ " ".join(doc["meta"]["right_context"].split(" ")[: max_length // 2])
)
else:
input_ = doc["input"]
input_ = html.unescape(input_)
return input_