timm/models/swin_transformer_v2_cr.py (6 lines): - line 116: meta_hidden_dim: int = 384, # FIXME what's the optimal value? - line 137: drop=(0.125, 0.) # FIXME should there be stochasticity, appears to 'overfit' without? - line 374: # FIXME PyTorch XLA needs cat impl, roll not lowered - line 402: # FIXME PyTorch XLA needs cat impl, roll not lowered - line 759: # FIXME more experiments needed - line 897: # FIXME WIP determining if there's a better weight init timm/models/naflexvit.py (6 lines): - line 162: # FIXME confirm we want 'channels last' in the patch channel layout, egg ph, ph, C instead of C, ph, hw - line 455: # k = h << 16 | w # FIXME can get jit compat with this - line 459: # h, w = k >> 16, k & 0xFFFF # FIXME can get jit compat with this - line 993: block_fn = cfg.block_fn or Block # TODO: Support configurable block_fn via string lookup - line 994: mlp_layer = cfg.mlp_layer or Mlp # TODO: Support configurable mlp_layer via string lookup - line 1225: # FIXME unfinished / untested timm/models/cspnet.py (6 lines): - line 95: aa_layer: Optional[str] = None # FIXME support string factory for this - line 180: # FIXME partial shortcut needed if first block handled as per original, not used for my current impl - line 298: nn.AvgPool2d(2) if stride == 2 else nn.Identity(), # FIXME dilation handling - line 310: # FIXME this 1x1 expansion is pushed down into the cross and block paths in the darknet cfgs. Also, - line 377: nn.AvgPool2d(2) if stride == 2 else nn.Identity(), # FIXME dilation handling - line 444: nn.AvgPool2d(2) if stride == 2 else nn.Identity(), # FIXME dilation handling timm/models/_efficientnet_blocks.py (5 lines): - line 93: use_aa = aa_layer is not None and stride > 1 # FIXME handle dilation - line 147: use_aa = aa_layer is not None and stride > 1 # FIXME handle dilation - line 236: use_aa = aa_layer is not None and stride > 1 # FIXME handle dilation - line 344: # FIXME dilation isn't right w/ extra ks > 1 convs - line 668: use_aa = aa_layer is not None and stride > 1 # FIXME handle dilation timm/models/mobilenetv5.py (4 lines): - line 297: # FIXME MFSA and forward_intermediates overlap, they both take indices from specific features - line 320: # FIXME see note above - line 359: # FIXME fix grad checkpointing - line 545: # FIXME fix grad checkpointing timm/layers/evo_norm.py (3 lines): - line 67: x = x.reshape(B, groups, -1) # FIXME simpler shape causing TPU / XLA issues - line 81: x = x.reshape(B, groups, -1) # FIXME simpler shape causing TPU / XLA issues - line 87: #group_std = group_std_tpu # FIXME TPU temporary timm/data/naflex_random_erasing.py (3 lines): - line 141: # FIXME WIP, not completed. Downstream support in model needed for non-contiguous valid patches - line 154: # patch dropout mode, completely remove dropped patches (FIXME needs downstream support in model) - line 324: # FIXME we could vectorize patch mode across batch, worth the effort? timm/models/_features.py (3 lines): - line 156: FIXME This works well in eager Python but needs redesign for torchscript. - line 305: # FIXME this may need to be more generic / flexible for some nets - line 359: FIXME this does not currently work with Torchscript, see FeatureHooks class timm/optim/adafactor_bv.py (3 lines): - line 87: # FIXME try to check if momentum dtype is appropriate for device? Torch API not great for this. - line 119: # FIXME this is a bit of a hack, optimizer.load_state_dict appears to upcast - line 337: # FIXME TODO timm/data/readers/reader_tfds.py (3 lines): - line 146: self.input_key = input_key # FIXME support tuples / lists of inputs and targets and full range of Feature - line 179: # FIXME need to determine if reinit_each_iter is necessary. I'm don't completely trust behaviour - line 249: num_replicas_in_sync=self.dist_num_replicas # FIXME does this arg have any impact? train.py (3 lines): - line 523: args.num_classes = model.num_classes # FIXME handle model default vs config num_classes more elegantly - line 546: model.to(device=device, dtype=model_dtype) # FIXME move model device & dtype into create_model - line 841: # FIXME reduces validation padding issues when using TFDS, WDS w/ workers and distributed training timm/layers/pos_embed_rel.py (3 lines): - line 39: # # FIXME different q vs k sizes is a WIP, need to better offset the two grids? - line 98: src_size = (src_size, src_size) # FIXME could support non-equal src if argument passed - line 481: # FIXME change to not use one-hot/einsum? timm/models/davit.py (3 lines): - line 589: # FIXME generalize this structure to ClassifierHead - line 787: # FIXME cleaner approach to missing head norm? - line 813: # TODO contact authors to get larger pretrained models timm/data/readers/reader_wds.py (3 lines): - line 210: # _logger.info(f'shuffle seed: {self.seed}, {seed}, epoch: {epoch}') # FIXME temporary - line 434: # _logger.info(f'start {i}, {self.worker_id}') # FIXME temporary debug - line 441: # _logger.info(f'end {i}, {self.worker_id}') # FIXME temporary debug timm/models/vision_transformer.py (3 lines): - line 1772: # hf_hub_id='timm/vit_base_patch32_clip_224.openai_ft_in12k_in1k', # FIXME weight exists, need to push - line 2668: # FIXME Google FlexiViT pretrained models have a strong preference for bilinear patch / embed - line 2674: # FIXME attn pool (currently only in siglip) params removed if pool disabled, is there a better soln? timm/optim/adopt.py (2 lines): - line 187: #@_use_grad_for_differentiable # FIXME internal context mgr, can't use - line 454: #@_disable_dynamo_if_unsupported(single_tensor_fn=_single_tensor_adopt) # FIXME internal context mgr, can't use timm/optim/adamw.py (2 lines): - line 298: # FIXME not 100% sure if this remains capturable? - line 373: # TODO: use foreach_pow if/when foreach_pow is added timm/data/readers/reader_factory.py (2 lines): - line 22: # FIXME improve the selection right now just tfds prefix or fallback path, will need options to - line 40: # FIXME support split here or in reader? timm/layers/halo_attn.py (2 lines): - line 149: # FIXME not clear if this stride behaviour is what the paper intended - line 189: # FIXME figure out how to switch impl between this and conv2d if XLA being used. timm/models/regnet.py (2 lines): - line 128: # TODO dWr scaling? - line 1207: # FIXME invalid weight <-> model match, mistake on their end timm/layers/patch_embed.py (2 lines): - line 183: # FIXME to remove, keeping for comparison for now - line 596: # FIXME WIP timm/models/efficientnet.py (2 lines): - line 1428: # FIXME experimental - line 2139: # FIXME experimental group cong / GroupNorm / EvoNorm experiments timm/models/resnest.py (2 lines): - line 49: assert aa_layer is None # TODO not yet supported - line 50: assert drop_path is None # TODO not yet supported timm/models/twins.py (2 lines): - line 431: # FIXME slice block/pos_block if < max - line 471: # FIXME add block pruning timm/models/mvitv2.py (2 lines): - line 863: # FIXME slice block/pos_block if < max - line 905: # FIXME add stage pruning timm/models/maxxvit.py (2 lines): - line 566: # FIXME handle dilation of avg pool - line 676: # FIXME handle dilation? timm/models/_efficientnet_builder.py (2 lines): - line 146: force_in_chs = int(options['fc']) if 'fc' in options else 0 # FIXME hack to deal with in_chs issue in TPU def - line 471: # FIXME s2d is a WIP timm/layers/pos_embed_sincos.py (2 lines): - line 73: # FIXME add support for unflattened spatial dim? - line 194: # FIXME support nD timm/optim/nadamw.py (2 lines): - line 267: # FIXME not 100% sure if this remains capturable? - line 342: # TODO: use foreach_pow if/when foreach_pow is added timm/models/byobnet.py (2 lines): - line 525: FIXME is there a more common 3x3 + 1x1 conv block to name this after? - line 977: # FIXME need to dilate self attn to have dilated network support, moop moop timm/models/res2net.py (1 line): - line 62: # FIXME this should probably have count_include_pad=False, but hurts original weights timm/optim/lamb.py (1 line): - line 229: # FIXME nested where required since logical and/or not working in PT XLA timm/layers/pool2d_same.py (1 line): - line 16: # FIXME how to deal with count_include_pad vs not for external padding? timm/models/_manipulate.py (1 line): - line 22: # FIXME this a bit of a quick and dirty hack to skip classifier head params based on ordering timm/layers/lambda_layer.py (1 line): - line 127: # FIXME relative pos embedding path not fully verified timm/models/vovnet.py (1 line): - line 193: assert output_stride == 32 # FIXME support dilation timm/data/naflex_loader.py (1 line): - line 376: # FIXME add crop args when sequence transforms support crop modes timm/models/fastvit.py (1 line): - line 349: # FIXME output of this act was not used in original impl, likely due to bug timm/layers/mlp.py (1 line): - line 180: hidden_features = hidden_features // 2 # FIXME base reduction on gate property? timm/models/_helpers.py (1 line): - line 24: # FIXME replace with 3.9 stdlib fn when min at 3.9 timm/models/efficientvit_mit.py (1 line): - line 1171: # FIXME will wait for v2 SAM models which are pending timm/models/_registry.py (1 line): - line 217: # FIXME should this be default behaviour? or default to include_tags=True? timm/models/vision_transformer_sam.py (1 line): - line 590: # FIXME only apply to final? Need experiments timm/models/crossvit.py (1 line): - line 73: # FIXME look at relaxing size constraints timm/optim/lars.py (1 line): - line 106: # FIXME nested where required since logical and/or not working in PT XLA timm/layers/attention2d.py (1 line): - line 136: # FIXME dilation timm/models/pit.py (1 line): - line 365: # FIXME need to update resize for PiT impl validate.py (1 line): - line 248: model = model.to(device=device, dtype=model_dtype) # FIXME move model device & dtype into create_model timm/data/readers/reader_image_in_tar.py (1 line): - line 89: cache_tarinfo = True if tar_bytes > 10*1024**3 else False # FIXME magic number, 10GB timm/models/_prune.py (1 line): - line 98: # FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer? timm/models/edgenext.py (1 line): - line 350: # FIXME support dilation / output_stride timm/loss/binary_cross_entropy.py (1 line): - line 44: # FIXME should off/on be different for smoothing w/ BCE? Other impl out there differ timm/optim/sgdw.py (1 line): - line 92: # FIXME figure out how to make _use_grad_for_differentiable interchangeable with no_grad decorator timm/layers/squeeze_excite.py (1 line): - line 98: x_se = x.mean((1, 2), keepdims=True) # FIXME avg dim [1:n-1], don't assume 2D NHWC timm/optim/mars.py (1 line): - line 183: # FIXME add multi-tensor (if usage warrants), make more standard timm/models/senet.py (1 line): - line 11: FIXME I'm deprecating this model and moving them to ResNet as I don't want to maintain duplicate bulk_runner.py (1 line): - line 204: # FIXME batch_size retry loop is currently done in either validation.py or benchmark.py timm/layers/attention_pool.py (1 line): - line 83: # FIXME interpolate timm/data/auto_augment.py (1 line): - line 905: # TODO the results appear in the right ballpark but they differ by more than rounding. timm/models/coat.py (1 line): - line 551: parallel_blocks=[ # FIXME (partially?) overlap parallel w/ serial blocks?? timm/layers/norm_act.py (1 line): - line 99: # TODO: if statement only here to tell the jit to skip emitting this when it is None timm/models/ghostnet.py (1 line): - line 657: # FIXME init timm/models/mobilenetv3.py (1 line): - line 650: FIXME untested, this is a preliminary impl of some FBNet-V3 variants. timm/data/imagenet_info.py (1 line): - line 40: # FIXME at some point pretrained_cfg should include dataset-tag, timm/models/_factory.py (1 line): - line 26: # FIXME may use fragment as revision, currently `@` in URI path timm/models/dla.py (1 line): - line 280: assert output_stride == 32 # FIXME support dilation timm/layers/drop.py (1 line): - line 137: self.fast = fast # FIXME finish comparisons of fast vs not timm/models/mlp_mixer.py (1 line): - line 300: # FIXME drop_path (stochastic depth scaling rule or all the same?) timm/models/tiny_vit.py (1 line): - line 626: # TODO: whether move this func into model for dynamic input resolution? (high risk) timm/models/_hub.py (1 line): - line 132: # FIXME I may change @ -> # and be parsed as fragment in a URI model name scheme timm/data/transforms_factory.py (1 line): - line 146: # FIXME integration of RKR is a WIP timm/utils/distributed.py (1 line): - line 114: # FIXME: verify that ROCm transform nccl to rccl timm/data/dataset_factory.py (1 line): - line 218: # FIXME support more advance split cfg for ImageFolder/Tar datasets in the future timm/optim/adamp.py (1 line): - line 33: # FIXME this is a problem for PyTorch XLA timm/models/densenet.py (1 line): - line 357: (r'^features\.transition(\d+)', MATCH_PREV_GROUP) # FIXME combine with previous denselayer timm/data/naflex_dataset.py (1 line): - line 5: TODO: 2. NaFlexIterableDatasetWrapper - Iterable dataset that yields batches with variable sequence lengths timm/models/dpn.py (1 line): - line 173: assert output_stride == 32 # FIXME look into dilation support timm/optim/kron.py (1 line): - line 106: deterministic: Deterministic behaviour across save / load (resume). FIXME slow, needs work timm/models/metaformer.py (1 line): - line 552: # FIXME not actually returning mlp hidden state right now as pre-logits. timm/optim/_param_groups.py (1 line): - line 87: # FIXME interface needs more work timm/models/hrnet.py (1 line): - line 536: assert output_stride == 32 # FIXME support dilation