models/vision_transformer.py (3 lines):
	- line 98: url='',  # FIXME I have weights for this but > 2GB limit for github release binaries
	- line 220: # FIXME look at relaxing size constraints
	- line 239: # FIXME this is hacky, but most reliable way of determining the exact dim of the output feature


test.py (1 line):
	- line 90: # TODO: support multiple images per gpu (only minor changes are needed)