-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy pathsimple.c
More file actions
73 lines (59 loc) · 2.15 KB
/
simple.c
File metadata and controls
73 lines (59 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// this is a deadly simple example in C just to demonstrate usage.
#include "clip.h"
#include <stdbool.h>
#include <stdio.h>
int main() {
char * model_path = "../../models/openai_clip-vit-base-patch32.q4_1.gguf";
char * img_path = "../../tests/red_apple.jpg";
char * text = "an apple";
int n_threads = 4;
int verbosity = 1;
// Load CLIP model
struct clip_ctx * ctx = clip_model_load(model_path, verbosity);
if (!ctx) {
printf("%s: Unable to load model from %s", __func__, model_path);
return 1;
}
int vec_dim = clip_get_vision_hparams(ctx)->projection_dim;
// Load image from disk
struct clip_image_u8 * img0 = clip_image_u8_make();
if (!clip_image_load_from_file(img_path, img0)) {
fprintf(stderr, "%s: failed to load image from '%s'\n", __func__, img_path);
return 1;
}
// Preprocess image
struct clip_image_f32 * img_res = clip_image_f32_make();
if (!clip_image_preprocess(ctx, img0, img_res)) {
fprintf(stderr, "%s: failed to preprocess image\n", __func__);
return 1;
}
// Encode image
float *img_vec = (float*)malloc(vec_dim * sizeof(float));
if (!clip_image_encode(ctx, n_threads, img_res, img_vec, true)) {
fprintf(stderr, "%s: failed to encode image\n", __func__);
return 1;
}
// Tokenize text
struct clip_tokens * tokens = NULL;
clip_tokenize(ctx, text, tokens);
// Encode text
float *txt_vec= (float *)malloc(vec_dim * sizeof(float));
if (!clip_text_encode(ctx, n_threads, tokens, txt_vec, true)) {
fprintf(stderr, "%s: failed to encode text\n", __func__);
return 1;
}
// Calculate image-text similarity
float score = clip_similarity_score(img_vec, txt_vec, vec_dim);
// Alternatively, you can replace the above steps with:
// float score;
// if (!clip_compare_text_and_image_c(ctx, n_threads, text, img0, &score)) {
// fprintf(stderr, "%s: failed to encode text\n", __func__);
// return 1;
// }
printf("Similarity score = %2.3f\n", score);
// Cleanup
clip_free(ctx);
free(img_vec);
free(txt_vec);
return 0;
}