in baselines/gist_baseline.py [0:0]
def main():
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('runtime options')
aa('--nproc', default=0, type=int, help="number of subprocesses to use")
aa('--giststream_exec',
default="lear_gist-1.2/compute_gist_stream",
help="executable that extracts GIST features")
group = parser.add_argument_group('feature extraction options')
aa('--transpose', default=-1, type=int, help="one of the 7 PIL transpose options ")
aa('--train_pca', default=False, action="store_true", help="run PCA training")
aa('--pca_file', default="", help="File with PCA descriptors")
aa('--pca_dim', default=256, type=int, help="output dimension for PCA")
aa('--pca_white', default=0.0, type=float, help="set to -0.5 to whiten PCA")
group = parser.add_argument_group('dataset options')
aa('--file_list', required=True, help="CSV file with image filenames")
aa('--image_dir', default="", help="search image files in this directory")
aa('--n_train_pca', default=10000, type=int, help="nb of training vectors for the PCA")
aa('--i0', default=0, type=int, help="first image to process")
aa('--i1', default=-1, type=int, help="last image to process + 1")
group = parser.add_argument_group('output options')
aa('--o', default="/tmp/desc.hdf5", help="write trained features to this file")
args = parser.parse_args()
print("args=", args)
print("reading image names from", args.file_list)
if 'Linux' in platform.platform():
os.system(
'echo hardware_image_description: '
'$( cat /proc/cpuinfo | grep ^"model name" | tail -1 ), '
'$( cat /proc/cpuinfo | grep ^processor | wc -l ) cores'
)
else:
print("hardware_image_description:", platform.machine(), "nb of threads:", args.nproc)
image_ids = [l.strip() for l in open(args.file_list, "r")]
if args.i1 == -1:
args.i1 = len(image_ids)
image_ids = image_ids[args.i0:args.i1]
# full path name for the image
image_dir = args.image_dir
if not image_dir.endswith('/'):
image_dir += "/"
# add jpg suffix if there is none
image_list = [
image_dir + fname if "." in fname else image_dir + fname + ".jpg"
for fname in image_ids
]
print(f" found {len(image_list)} images")
if args.train_pca:
rs = np.random.RandomState(123)
image_list = [
image_list[i]
for i in rs.choice(len(image_list), size=args.n_train_pca, replace=False)
]
print(f"subsampled {args.n_train_pca} vectors")
print("computing features")
gist = GISTFeatures(
execname=args.giststream_exec,
transpose=args.transpose
)
t0 = time.time()
if args.nproc == 0:
all_desc = gist.compute_features(image_list)
else:
pool = ProcessPool(args.nproc)
n = len(image_list)
nproc = args.nproc
sub_lists = [
image_list[i * n // nproc : (i + 1) * n // nproc]
for i in range(nproc)
]
all_desc = list(pool.map(gist.compute_features, sub_lists))
all_desc = np.vstack(all_desc)
# normalization is important
faiss.normalize_L2(all_desc)
t1 = time.time()
print()
print(f"image_description_time: {(t1 - t0) / len(image_list):.5f} s per image")
if args.train_pca:
d = all_desc.shape[1]
pca = faiss.PCAMatrix(d, args.pca_dim, args.pca_white)
print(f"Train PCA {pca.d_in} -> {pca.d_out}")
pca.train(all_desc)
print(f"Storing PCA to {args.pca_file}")
faiss.write_VectorTransform(pca, args.pca_file)
elif args.pca_file:
print("Load PCA matrix", args.pca_file)
pca = faiss.read_VectorTransform(args.pca_file)
print(f"Apply PCA {pca.d_in} -> {pca.d_out}")
all_desc = pca.apply_py(all_desc)
if not args.train_pca:
print(f"writing descriptors to {args.o}")
write_hdf5_descriptors(all_desc, image_ids, args.o)