class ClipVision:
PROJECTOR_TYPE = "clip.vision.projector_type" # for mixed modality models
IMAGE_SIZE = "clip.vision.image_size"
+ IMAGE_MIN_PIXELS = "clip.vision.image_min_pixels"
+ IMAGE_MAX_PIXELS = "clip.vision.image_max_pixels"
PREPROC_IMAGE_SIZE = "clip.vision.preproc_image_size"
PATCH_SIZE = "clip.vision.patch_size"
EMBEDDING_LENGTH = "clip.vision.embedding_length"
def add_vision_image_size(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
+ def add_vision_max_pixels(self, value: int) -> None:
+ self.add_uint32(Keys.ClipVision.IMAGE_MAX_PIXELS, value)
+
+ def add_vision_min_pixels(self, value: int) -> None:
+ self.add_uint32(Keys.ClipVision.IMAGE_MIN_PIXELS, value)
+
def add_vision_preproc_image_size(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value)
// vision-specific
#define KEY_VISION_PROJ_TYPE "clip.vision.projector_type" // for models with mixed modalities
#define KEY_IMAGE_SIZE "clip.vision.image_size"
+#define KEY_IMAGE_MIN_PIXELS "clip.vision.image_min_pixels"
+#define KEY_IMAGE_MAX_PIXELS "clip.vision.image_max_pixels"
#define KEY_PREPROC_IMAGE_SIZE "clip.vision.preproc_image_size"
#define KEY_PATCH_SIZE "clip.vision.patch_size"
#define KEY_IMAGE_MEAN "clip.vision.image_mean"