modules/SwissArmyTransformer/sat/generation/autoregressive_sampling.py (4 lines): - line 105: log_attention_weights_part = log_attention_weights[..., index: counter+1, :counter+1] # TODO memlen - line 115: attention_mask=attention_mask[..., index: counter+1, :counter+1], # TODO memlen - line 196: log_attention_weights_part = log_attention_weights[..., index: counter+1, :counter+1] # TODO memlen - line 206: attention_mask=attention_mask[..., index: counter+1, :counter+1], # TODO memlen modules/SwissArmyTransformer/sat/ops/csrc/adam/multi_tensor_apply.cuh (2 lines): - line 24: // TODO: Kernel arg size limit may be <4KB for some other cards (ie Jetson) - line 65: // TODO: Print which tensor fails. modules/SwissArmyTransformer/sat/generation/sampling_strategies/beam_search_strategy.py (2 lines): - line 96: ngram_prefix = tokens[i, -(self.ngram-1):].tolist() # TODO ngram=1 - line 143: ngram_prefix = tuple(tokens[next_indices[i], -(self.ngram-1):].tolist()) # TODO ngram=1 sat/sgm/modules/diffusionmodules/openaimodel.py (2 lines): - line 386: # TODO add crossframe attention and use mixed checkpoint - line 389: ) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!! diffusers-version/tora/i2v_pipeline.py (1 line): - line 800: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline modules/SwissArmyTransformer/sat/training/model_io.py (1 line): - line 394: # TODO: Remove `args` and the parsing logic when BC allows. modules/SwissArmyTransformer/sat/model/transformer.py (1 line): - line 656: # TODO add warning for depth>=2 grad tensors modules/SwissArmyTransformer/sat/data_utils/configure_data.py (1 line): - line 60: if distributed: # TODO reformat this, but it is not urgent sat/app.py (1 line): - line 1302: # TODO: passing 'base' params through the command line modules/SwissArmyTransformer/sat/training/deepspeed_training.py (1 line): - line 152: # TODO add rng states for data parallel and wrap drops in main path. sat/sgm/modules/attention.py (1 line): - line 325: # TODO: Use this directly in the attention operation, as a bias sat/sgm/modules/autoencoding/regularizers/quantize.py (1 line): - line 152: # TODO: shape not yet optional modules/SwissArmyTransformer/sat/model/base_model.py (1 line): - line 432: try: # TODO: is this useful? sat/data_video.py (1 line): - line 229: duration: preknow the duration to speed up by seeking to sampled start. TODO by_pass if unknown. modules/SwissArmyTransformer/sat/model/encoder_decoder_model.py (1 line): - line 101: def from_pretrained(cls, args, name, *, home_path=None, url=None): # TODO update model-only mode modules/SwissArmyTransformer/sat/arguments.py (1 line): - line 65: # TODO: fully test it, support the generation. diffusers-version/tora/t2v_pipeline.py (1 line): - line 724: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline sat/vae_modules/attention.py (1 line): - line 325: # TODO: Use this directly in the attention operation, as a bias