def from_dict()

in src/mlm/loaders.py [0:0]
21 lines of code
8 McCabe index (conditional complexity)

    def from_dict(cls, obj_dict: Dict[str, Dict[str, Any]], max_utts: Optional[int] = None, vocab: Optional[nlp.Vocab] = None, tokenizer = None):
        """Loads hypotheses from the format of Shin et al. (JSON)
        
        Args:
            fp (str): JSON file name
            max_utts (None, optional): Number of utterances to process
            vocab (None, optional): Vocabulary
        
        Returns:
            TYPE: Description
        """

        # Just a dictionary for now
        # but equipped with this factory method
        preds = cls()

        item_list = sorted(obj_dict.items())
        if max_utts is not None:
            item_list = item_list[:max_utts]
        for utt_id, hyps_dict in item_list:

            num_hyps = 0
            for key in hyps_dict.keys():
                if key.startswith("hyp_"):
                    num_hyps += 1

            sents = [None]*num_hyps
            scores = [None]*num_hyps
            # hyps_dict key-values look like:
            # 'ref': "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel"
            # 'hyp_100' {'score': -10.107752799987793, 'text': ' mister quillter is the apostle of the middle classes and weir glad to welcome his gospel'}
            for hyp_id, hyp_data in hyps_dict.items():
                if not hyp_id.startswith('hyp_'):
                    continue
                # 'hyp_100' --> 99
                idx = int(hyp_id.split('_')[1]) - 1
                sents[idx] = hyp_data['text'].strip()
                scores[idx] = hyp_data['score']

            hyps = Hypotheses(sents, scores, vocab, tokenizer)
            preds[utt_id] = hyps

        return preds