configs/detection3d/bevformer/bevformer_base_r101_dcn_nuscenes.py [205:300]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            dict(
                type='DefaultFormatBundle3D',
                class_names=CLASSES,
                with_label=False),
            dict(
                type='Collect3D',
                keys=['img'],
                meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
                           'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
                           'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
                           'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
                           'pcd_trans', 'sample_idx', 'prev_idx', 'next_idx',
                           'pcd_scale_factor', 'pcd_rotation', 'pts_filename',
                           'transformation_3d_flow', 'scene_token', 'can_bus'))
        ])
]

data = dict(
    imgs_per_gpu=1,  # 8gpus, total batch size=8
    workers_per_gpu=4,
    pin_memory=True,
    # shuffler_sampler=dict(type='DistributedGroupSampler'),
    # nonshuffler_sampler=dict(type='DistributedSampler'),
    train=dict(
        type=dataset_type,
        data_source=dict(
            type='Det3dSourceNuScenes',
            data_root=data_root,
            ann_file=data_root + 'nuscenes_infos_temporal_train.pkl',
            pipeline=[
                dict(
                    type='LoadMultiViewImageFromFiles',
                    to_float32=True,
                    backend='turbojpeg'),
                dict(
                    type='LoadAnnotations3D',
                    with_bbox_3d=True,
                    with_label_3d=True,
                    with_attr_label=False)
            ],
            classes=CLASSES,
            modality=input_modality,
            test_mode=False,
            use_valid_flag=True,
            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
            box_type_3d='LiDAR'),
        pipeline=train_pipeline,
        queue_length=queue_length,
    ),
    val=dict(
        imgs_per_gpu=1,
        type=dataset_type,
        data_source=dict(
            type='Det3dSourceNuScenes',
            data_root=data_root,
            ann_file=data_root + 'nuscenes_infos_temporal_val.pkl',
            pipeline=[
                dict(
                    type='LoadMultiViewImageFromFiles',
                    to_float32=True,
                    backend='turbojpeg')
            ],
            classes=CLASSES,
            modality=input_modality,
            test_mode=True),
        pipeline=test_pipeline))

paramwise_cfg = {'img_backbone': dict(lr_mult=0.1)}
optimizer = dict(
    type='AdamW', lr=2e-4, paramwise_options=paramwise_cfg, weight_decay=0.01)

optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='CosineAnnealing',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    min_lr_ratio=1e-3)
total_epochs = 24

eval_config = dict(initial=False, interval=1, gpu_collect=False)
eval_pipelines = [
    dict(
        mode='test',
        data=data['val'],
        dist_eval=True,
        evaluators=[
            dict(
                type='NuScenesEvaluator',
                classes=CLASSES,
                result_names=['pts_bbox'])
        ],
    )
]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


configs/detection3d/bevformer/bevformer_tiny_r50_nuscenes.py [213:308]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            dict(
                type='DefaultFormatBundle3D',
                class_names=CLASSES,
                with_label=False),
            dict(
                type='Collect3D',
                keys=['img'],
                meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
                           'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
                           'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
                           'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
                           'pcd_trans', 'sample_idx', 'prev_idx', 'next_idx',
                           'pcd_scale_factor', 'pcd_rotation', 'pts_filename',
                           'transformation_3d_flow', 'scene_token', 'can_bus'))
        ])
]

data = dict(
    imgs_per_gpu=1,  # 8gpus, total batch size=8
    workers_per_gpu=4,
    pin_memory=True,
    # shuffler_sampler=dict(type='DistributedGroupSampler'),
    # nonshuffler_sampler=dict(type='DistributedSampler'),
    train=dict(
        type=dataset_type,
        data_source=dict(
            type='Det3dSourceNuScenes',
            data_root=data_root,
            ann_file=data_root + 'nuscenes_infos_temporal_train.pkl',
            pipeline=[
                dict(
                    type='LoadMultiViewImageFromFiles',
                    to_float32=True,
                    backend='turbojpeg'),
                dict(
                    type='LoadAnnotations3D',
                    with_bbox_3d=True,
                    with_label_3d=True,
                    with_attr_label=False)
            ],
            classes=CLASSES,
            modality=input_modality,
            test_mode=False,
            use_valid_flag=True,
            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
            box_type_3d='LiDAR'),
        pipeline=train_pipeline,
        queue_length=queue_length,
    ),
    val=dict(
        imgs_per_gpu=1,
        type=dataset_type,
        data_source=dict(
            type='Det3dSourceNuScenes',
            data_root=data_root,
            ann_file=data_root + 'nuscenes_infos_temporal_val.pkl',
            pipeline=[
                dict(
                    type='LoadMultiViewImageFromFiles',
                    to_float32=True,
                    backend='turbojpeg')
            ],
            classes=CLASSES,
            modality=input_modality,
            test_mode=True),
        pipeline=test_pipeline))

paramwise_cfg = {'img_backbone': dict(lr_mult=0.1)}
optimizer = dict(
    type='AdamW', lr=2e-4, paramwise_options=paramwise_cfg, weight_decay=0.01)

optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='CosineAnnealing',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    min_lr_ratio=1e-3)
total_epochs = 24

eval_config = dict(initial=False, interval=1, gpu_collect=False)
eval_pipelines = [
    dict(
        mode='test',
        data=data['val'],
        dist_eval=True,
        evaluators=[
            dict(
                type='NuScenesEvaluator',
                classes=CLASSES,
                result_names=['pts_bbox'])
        ],
    )
]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -