in align/data.py [0:0]
def filter(bitext, align):
real_bitext = list()
edges = list()
for i, a in enumerate(align):
try:
a = json.loads(a)
if len(bitext[i]) == 2:
bitext[i][0] = bitext[i][0].split()
bitext[i][1] = bitext[i][1].split()
real_bitext.append(bitext[i])
edge_info = np.zeros((len(bitext[i][0]), len(bitext[i][1])))
for x, y in a['inter']:
edge_info[x, y] = 2
for x, y in a['itermax']:
if edge_info[x, y] == 0:
edge_info[x, y] = 1
edges.append(edge_info)
except:
continue
return real_bitext, edges