def icsl_transform()

in code/scripts/utils.py [0:0]


def icsl_transform(sample, vocabulary, label2idx, intent2idx, bert_tokenizer):
    eid = int(sample[3])
    out_sample = []
    tag_alignment = []
    bert_tokens = ['[CLS]']
    bert_tags = []
    for w, tag in zip(sample[0].split(), sample[1].split()):
        tag_alignment.append(len(bert_tags))
        bert_toks = bert_tokenizer(w)
        bert_tokens.extend(bert_toks)
        if tag.startswith('B'):
            cont_tag = 'I' + tag[1:]
            bert_tags.extend([tag] + [cont_tag] * (len(bert_toks) - 1))
        else:
            bert_tags.extend([tag] * len(bert_toks))
    bert_tokens += ['[SEP]']
    bert_tags += [PAD]
    # add example id
    out_sample += [eid]
    # add token ids
    out_sample += [[vocabulary[tok] for tok in bert_tokens]]
    # add slot labels
    out_sample += [[label2index(label2idx, tag) for tag in bert_tags]]
    # add intent label
    out_sample += [label2index(intent2idx, sample[2])]
    # add valid length
    valid_len = len(bert_tokens)
    out_sample += [valid_len]
    return out_sample, tag_alignment