def parallel_icsl_transform()

in code/scripts/utils.py [0:0]


def parallel_icsl_transform(sample, vocabulary, label2idx, intent2idx, bert_tokenizer):
    out_sample = []
    target = ['[CLS]']
    bert_tags = []
    for w, tag in zip(sample[1].split(), sample[2].split()):
        bert_toks = bert_tokenizer(w)
        target.extend(bert_toks)
        if tag.startswith('B'):
            cont_tag = 'I' + tag[1:]
            bert_tags.extend([tag] + [cont_tag] * (len(bert_toks) - 1))
        else:
            bert_tags.extend([tag] * len(bert_toks))
    target += ['[SEP]']
    bert_tags += [PAD]
    source = ['[CLS]'] + bert_tokenizer(sample[0]) + ['[SEP]']
    # add source ids
    out_sample += [[vocabulary[tok] for tok in source]]
    # add target ids
    out_sample += [[vocabulary[tok] for tok in target]]
    # add slot labels
    out_sample += [[label2index(label2idx, tag) for tag in bert_tags]]
    # add intent label
    out_sample += [label2index(intent2idx, sample[3])]
    # add source valid length
    out_sample += [len(source)]
    # add target valid length
    out_sample += [len(target)]
    return out_sample