timm/models/swin_transformer_v2_cr.py (6 lines):
	- line 116: meta_hidden_dim: int = 384,  # FIXME what's the optimal value?
	- line 137: drop=(0.125, 0.)  # FIXME should there be stochasticity, appears to 'overfit' without?
	- line 374: # FIXME PyTorch XLA needs cat impl, roll not lowered
	- line 402: # FIXME PyTorch XLA needs cat impl, roll not lowered
	- line 759: # FIXME more experiments needed
	- line 897: # FIXME WIP determining if there's a better weight init


timm/models/naflexvit.py (6 lines):
	- line 162: # FIXME confirm we want 'channels last' in the patch channel layout, egg ph, ph, C instead of C, ph, hw
	- line 455: # k = h << 16 | w  # FIXME can get jit compat with this
	- line 459: # h, w = k >> 16, k & 0xFFFF  # FIXME can get jit compat with this
	- line 993: block_fn = cfg.block_fn or Block  # TODO: Support configurable block_fn via string lookup
	- line 994: mlp_layer = cfg.mlp_layer or Mlp   # TODO: Support configurable mlp_layer via string lookup
	- line 1225: # FIXME unfinished / untested


timm/models/cspnet.py (6 lines):
	- line 95: aa_layer: Optional[str] = None  # FIXME support string factory for this
	- line 180: # FIXME partial shortcut needed if first block handled as per original, not used for my current impl
	- line 298: nn.AvgPool2d(2) if stride == 2 else nn.Identity(),  # FIXME dilation handling
	- line 310: # FIXME this 1x1 expansion is pushed down into the cross and block paths in the darknet cfgs. Also,
	- line 377: nn.AvgPool2d(2) if stride == 2 else nn.Identity(),  # FIXME dilation handling
	- line 444: nn.AvgPool2d(2) if stride == 2 else nn.Identity(),   # FIXME dilation handling


timm/models/_efficientnet_blocks.py (5 lines):
	- line 93: use_aa = aa_layer is not None and stride > 1  # FIXME handle dilation
	- line 147: use_aa = aa_layer is not None and stride > 1  # FIXME handle dilation
	- line 236: use_aa = aa_layer is not None and stride > 1  # FIXME handle dilation
	- line 344: # FIXME dilation isn't right w/ extra ks > 1 convs
	- line 668: use_aa = aa_layer is not None and stride > 1  # FIXME handle dilation


timm/models/mobilenetv5.py (4 lines):
	- line 297: # FIXME MFSA and forward_intermediates overlap, they both take indices from specific features
	- line 320: # FIXME see note above
	- line 359: # FIXME fix grad checkpointing
	- line 545: # FIXME fix grad checkpointing


timm/layers/evo_norm.py (3 lines):
	- line 67: x = x.reshape(B, groups, -1)  # FIXME simpler shape causing TPU / XLA issues
	- line 81: x = x.reshape(B, groups, -1)  # FIXME simpler shape causing TPU / XLA issues
	- line 87: #group_std = group_std_tpu  # FIXME TPU temporary


timm/data/naflex_random_erasing.py (3 lines):
	- line 141: # FIXME WIP, not completed. Downstream support in model needed for non-contiguous valid patches
	- line 154: # patch dropout mode, completely remove dropped patches (FIXME needs downstream support in model)
	- line 324: # FIXME we could vectorize patch mode across batch, worth the effort?


timm/models/_features.py (3 lines):
	- line 156: FIXME This works well in eager Python but needs redesign for torchscript.
	- line 305: # FIXME this may need to be more generic / flexible for some nets
	- line 359: FIXME this does not currently work with Torchscript, see FeatureHooks class


timm/optim/adafactor_bv.py (3 lines):
	- line 87: # FIXME try to check if momentum dtype is appropriate for device? Torch API not great for this.
	- line 119: # FIXME this is a bit of a hack, optimizer.load_state_dict appears to upcast
	- line 337: # FIXME TODO


timm/data/readers/reader_tfds.py (3 lines):
	- line 146: self.input_key = input_key  # FIXME support tuples / lists of inputs and targets and full range of Feature
	- line 179: # FIXME need to determine if reinit_each_iter is necessary. I'm don't completely trust behaviour
	- line 249: num_replicas_in_sync=self.dist_num_replicas  # FIXME does this arg have any impact?


train.py (3 lines):
	- line 523: args.num_classes = model.num_classes  # FIXME handle model default vs config num_classes more elegantly
	- line 546: model.to(device=device, dtype=model_dtype)  # FIXME move model device & dtype into create_model
	- line 841: # FIXME reduces validation padding issues when using TFDS, WDS w/ workers and distributed training


timm/layers/pos_embed_rel.py (3 lines):
	- line 39: #     # FIXME different q vs k sizes is a WIP, need to better offset the two grids?
	- line 98: src_size = (src_size, src_size)  # FIXME could support non-equal src if argument passed
	- line 481: # FIXME change to not use one-hot/einsum?


timm/models/davit.py (3 lines):
	- line 589: # FIXME generalize this structure to ClassifierHead
	- line 787: # FIXME cleaner approach to missing head norm?
	- line 813: # TODO contact authors to get larger pretrained models


timm/data/readers/reader_wds.py (3 lines):
	- line 210: # _logger.info(f'shuffle seed: {self.seed}, {seed}, epoch: {epoch}')  # FIXME temporary
	- line 434: # _logger.info(f'start {i}, {self.worker_id}')  # FIXME temporary debug
	- line 441: # _logger.info(f'end {i}, {self.worker_id}')  # FIXME temporary debug


timm/models/vision_transformer.py (3 lines):
	- line 1772: # hf_hub_id='timm/vit_base_patch32_clip_224.openai_ft_in12k_in1k',  # FIXME weight exists, need to push
	- line 2668: # FIXME Google FlexiViT pretrained models have a strong preference for bilinear patch / embed
	- line 2674: # FIXME attn pool (currently only in siglip) params removed if pool disabled, is there a better soln?


timm/optim/adopt.py (2 lines):
	- line 187: #@_use_grad_for_differentiable  # FIXME internal context mgr, can't use
	- line 454: #@_disable_dynamo_if_unsupported(single_tensor_fn=_single_tensor_adopt)  # FIXME internal context mgr, can't use


timm/optim/adamw.py (2 lines):
	- line 298: # FIXME not 100% sure if this remains capturable?
	- line 373: # TODO: use foreach_pow if/when foreach_pow is added


timm/data/readers/reader_factory.py (2 lines):
	- line 22: # FIXME improve the selection right now just tfds prefix or fallback path, will need options to
	- line 40: # FIXME support split here or in reader?


timm/layers/halo_attn.py (2 lines):
	- line 149: # FIXME not clear if this stride behaviour is what the paper intended
	- line 189: # FIXME figure out how to switch impl between this and conv2d if XLA being used.


timm/models/regnet.py (2 lines):
	- line 128: # TODO dWr scaling?
	- line 1207: # FIXME invalid weight <-> model match, mistake on their end


timm/layers/patch_embed.py (2 lines):
	- line 183: # FIXME to remove, keeping for comparison for now
	- line 596: #     FIXME WIP


timm/models/efficientnet.py (2 lines):
	- line 1428: # FIXME experimental
	- line 2139: # FIXME experimental group cong / GroupNorm / EvoNorm experiments


timm/models/resnest.py (2 lines):
	- line 49: assert aa_layer is None  # TODO not yet supported
	- line 50: assert drop_path is None  # TODO not yet supported


timm/models/twins.py (2 lines):
	- line 431: # FIXME slice block/pos_block if < max
	- line 471: # FIXME add block pruning


timm/models/mvitv2.py (2 lines):
	- line 863: # FIXME slice block/pos_block if < max
	- line 905: # FIXME add stage pruning


timm/models/maxxvit.py (2 lines):
	- line 566: # FIXME handle dilation of avg pool
	- line 676: # FIXME handle dilation?


timm/models/_efficientnet_builder.py (2 lines):
	- line 146: force_in_chs = int(options['fc']) if 'fc' in options else 0  # FIXME hack to deal with in_chs issue in TPU def
	- line 471: # FIXME s2d is a WIP


timm/layers/pos_embed_sincos.py (2 lines):
	- line 73: # FIXME add support for unflattened spatial dim?
	- line 194: # FIXME support nD


timm/optim/nadamw.py (2 lines):
	- line 267: # FIXME not 100% sure if this remains capturable?
	- line 342: # TODO: use foreach_pow if/when foreach_pow is added


timm/models/byobnet.py (2 lines):
	- line 525: FIXME is there a more common 3x3 + 1x1 conv block to name this after?
	- line 977: # FIXME need to dilate self attn to have dilated network support, moop moop


timm/models/res2net.py (1 line):
	- line 62: # FIXME this should probably have count_include_pad=False, but hurts original weights


timm/optim/lamb.py (1 line):
	- line 229: # FIXME nested where required since logical and/or not working in PT XLA


timm/layers/pool2d_same.py (1 line):
	- line 16: # FIXME how to deal with count_include_pad vs not for external padding?


timm/models/_manipulate.py (1 line):
	- line 22: # FIXME this a bit of a quick and dirty hack to skip classifier head params based on ordering


timm/layers/lambda_layer.py (1 line):
	- line 127: # FIXME relative pos embedding path not fully verified


timm/models/vovnet.py (1 line):
	- line 193: assert output_stride == 32  # FIXME support dilation


timm/data/naflex_loader.py (1 line):
	- line 376: # FIXME add crop args when sequence transforms support crop modes


timm/models/fastvit.py (1 line):
	- line 349: # FIXME output of this act was not used in original impl, likely due to bug


timm/layers/mlp.py (1 line):
	- line 180: hidden_features = hidden_features // 2  # FIXME base reduction on gate property?


timm/models/_helpers.py (1 line):
	- line 24: # FIXME replace with 3.9 stdlib fn when min at 3.9


timm/models/efficientvit_mit.py (1 line):
	- line 1171: # FIXME will wait for v2 SAM models which are pending


timm/models/_registry.py (1 line):
	- line 217: # FIXME should this be default behaviour? or default to include_tags=True?


timm/models/vision_transformer_sam.py (1 line):
	- line 590: # FIXME only apply to final? Need experiments


timm/models/crossvit.py (1 line):
	- line 73: # FIXME look at relaxing size constraints


timm/optim/lars.py (1 line):
	- line 106: # FIXME nested where required since logical and/or not working in PT XLA


timm/layers/attention2d.py (1 line):
	- line 136: # FIXME dilation


timm/models/pit.py (1 line):
	- line 365: # FIXME need to update resize for PiT impl


validate.py (1 line):
	- line 248: model = model.to(device=device, dtype=model_dtype)  # FIXME move model device & dtype into create_model


timm/data/readers/reader_image_in_tar.py (1 line):
	- line 89: cache_tarinfo = True if tar_bytes > 10*1024**3 else False  # FIXME magic number, 10GB


timm/models/_prune.py (1 line):
	- line 98: # FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer?


timm/models/edgenext.py (1 line):
	- line 350: # FIXME support dilation / output_stride


timm/loss/binary_cross_entropy.py (1 line):
	- line 44: # FIXME should off/on be different for smoothing w/ BCE? Other impl out there differ


timm/optim/sgdw.py (1 line):
	- line 92: # FIXME figure out how to make _use_grad_for_differentiable interchangeable with no_grad decorator


timm/layers/squeeze_excite.py (1 line):
	- line 98: x_se = x.mean((1, 2), keepdims=True)  # FIXME avg dim [1:n-1], don't assume 2D NHWC


timm/optim/mars.py (1 line):
	- line 183: # FIXME add multi-tensor (if usage warrants), make more standard


timm/models/senet.py (1 line):
	- line 11: FIXME I'm deprecating this model and moving them to ResNet as I don't want to maintain duplicate


bulk_runner.py (1 line):
	- line 204: # FIXME batch_size retry loop is currently done in either validation.py or benchmark.py


timm/layers/attention_pool.py (1 line):
	- line 83: # FIXME interpolate


timm/data/auto_augment.py (1 line):
	- line 905: # TODO the results appear in the right ballpark but they differ by more than rounding.


timm/models/coat.py (1 line):
	- line 551: parallel_blocks=[  # FIXME (partially?) overlap parallel w/ serial blocks??


timm/layers/norm_act.py (1 line):
	- line 99: # TODO: if statement only here to tell the jit to skip emitting this when it is None


timm/models/ghostnet.py (1 line):
	- line 657: # FIXME init


timm/models/mobilenetv3.py (1 line):
	- line 650: FIXME untested, this is a preliminary impl of some FBNet-V3 variants.


timm/data/imagenet_info.py (1 line):
	- line 40: # FIXME at some point pretrained_cfg should include dataset-tag,


timm/models/_factory.py (1 line):
	- line 26: # FIXME may use fragment as revision, currently `@` in URI path


timm/models/dla.py (1 line):
	- line 280: assert output_stride == 32  # FIXME support dilation


timm/layers/drop.py (1 line):
	- line 137: self.fast = fast  # FIXME finish comparisons of fast vs not


timm/models/mlp_mixer.py (1 line):
	- line 300: # FIXME drop_path (stochastic depth scaling rule or all the same?)


timm/models/tiny_vit.py (1 line):
	- line 626: # TODO: whether move this func into model for dynamic input resolution? (high risk)


timm/models/_hub.py (1 line):
	- line 132: # FIXME I may change @ -> # and be parsed as fragment in a URI model name scheme


timm/data/transforms_factory.py (1 line):
	- line 146: # FIXME integration of RKR is a WIP


timm/utils/distributed.py (1 line):
	- line 114: # FIXME: verify that ROCm transform nccl to rccl


timm/data/dataset_factory.py (1 line):
	- line 218: # FIXME support more advance split cfg for ImageFolder/Tar datasets in the future


timm/optim/adamp.py (1 line):
	- line 33: # FIXME this is a problem for PyTorch XLA


timm/models/densenet.py (1 line):
	- line 357: (r'^features\.transition(\d+)', MATCH_PREV_GROUP)  # FIXME combine with previous denselayer


timm/data/naflex_dataset.py (1 line):
	- line 5: TODO: 2. NaFlexIterableDatasetWrapper - Iterable dataset that yields batches with variable sequence lengths


timm/models/dpn.py (1 line):
	- line 173: assert output_stride == 32  # FIXME look into dilation support


timm/optim/kron.py (1 line):
	- line 106: deterministic: Deterministic behaviour across save / load (resume). FIXME slow, needs work


timm/models/metaformer.py (1 line):
	- line 552: # FIXME not actually returning mlp hidden state right now as pre-logits.


timm/optim/_param_groups.py (1 line):
	- line 87: # FIXME interface needs more work


timm/models/hrnet.py (1 line):
	- line 536: assert output_stride == 32  # FIXME support dilation