caption_coco.yaml 651 B

123456789101112131415161718192021222324252627282930313233
  1. image_root: '/export/share/datasets/vision/coco/images/'
  2. ann_root: 'annotation'
  3. coco_gt_root: 'annotation/coco_gt'
  4. # set pretrained as a file path or an url
  5. pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
  6. # size of vit model; base or large
  7. vit: 'base'
  8. vit_grad_ckpt: False
  9. vit_ckpt_layer: 0
  10. batch_size: 32
  11. init_lr: 1e-5
  12. # vit: 'large'
  13. # vit_grad_ckpt: True
  14. # vit_ckpt_layer: 5
  15. # batch_size: 16
  16. # init_lr: 2e-6
  17. image_size: 384
  18. # generation configs
  19. max_length: 20
  20. min_length: 5
  21. num_beams: 3
  22. prompt: 'a picture of '
  23. # optimizer
  24. weight_decay: 0.05
  25. min_lr: 0
  26. max_epoch: 5