Summary: 62 instances, 57 unique

Text	Count
# TODO: to extend with extra datasets and keys and loop over different shard data paths	1
# TODO: Very rare cases where the replacement is '<eos>' should be handled gracefully	1
# TODO: hide setting "encoder_attn" layers behind a flag.	1
# TODO: add an option for shrinking all size ratios to below 1	1
# TODO: move this into individual transforms	1
# TODO make summing of the sample sizes configurable	2
# TODO: [Hack] Here the grouped iterator modifies the base iterator size so that	1
# TODO add hparams to Tensorboard	1
# TODO: iterative refinement generator does not support ensemble for now.	1
# TODO: Below is a lazy implementation which discard the final batch regardless	1
# TODO common var add to parent	1
# TODO: Remove this after refactor of BERTModel	1
# TODO: this would become easier if encoder/decoder where using a similar	1
# TODO: should we shuffle ? we are already sorting batch by sizes so ?	1
# TODO should really be in the encoder config	1
# TODO common vars below add to parent	2
scaled_init=True,  # TODO: use constant for now.	1
# TODO: replace this workaround with refactoring of `AudioPretraining`	1
# TODO: could we do the BT using denoise sample ?	1
# FIXME: what happens if padding_count is specified?	1
# TODO: handle modified lang toks for mined data and dae data	1
# TODO: Can we add deteminism without this constraint?	1
# HACK for now, need to fix (TODO sidgoyal)	1
# TODO should really be in the decoder config	1
# TODO: implementing length-beam	1
# TODO: to investigate why TransformEosLangPairDataset doesn't work with ConcatDataset	1
# TODO: support different padding direction on target side	2
"tgt_speaker": tgt_speakers,  # TODO: unify "speaker" and "tgt_speaker"	1
# TODO make summing of the sample sizes configurable	1
# --  TODO T96535332	1
# TODO: incorporate max_len_a and max_len_b	1
TODO: ids in skip_ids should be consecutive, we can extend it to more generic version later	1
# TODO: allow more complex mapping	1
# TODO: speed up the following loop	1
# TODO: better encoder inputs?	1
# TODO: Consider mask here	1
# TODO common vars in parent class	1
# TODO: positional embedding on memory	1
# TODO: legacy parameter kept for compatibility	1
# FIXME: revert when gather based xla reduction is implemented	1
# TODO  remove this once we update apex with the fix	1
# TODO: fix positional embedding	1
# TODO: update this when transformer gets converted to dataclass configs	1
# TODO: decoding for InsertionTransformer	1
# TODO make it configurable whether to use max() or sum() here	1
# TODO: need to fix here	1
# TODO reseach new sum_query method	1
# TODO: make it configurable from the args	1
TODO:	1
# TODO common vars below in parent	2
# TODO: The right place for this offset would be inside	1
# TODO: add back prev_self_attn_state, prev_attn_state,	1
# TODO: we should reuse the pretrained model dict which already has <mask>	1
# TODO: Could we translate to several language at once ?	1
# TODO make bias configurable	1
# TODO: Unifiy with alter_dataset_langtok	2
# TODO replace `nonzero(as_tuple=False)` after TorchScript supports it	1