- video_root: '/export/share/dongxuli/data/msrvtt_retrieval/videos'
- ann_root: 'annotation'
- # set pretrained as a file path or an url
- pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
- # size of vit model; base or large
- vit: 'base'
- batch_size: 64
- k_test: 128
- image_size: 384
- num_frm_test: 8
|