-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_prepare_COCO.py
More file actions
77 lines (62 loc) · 2.58 KB
/
data_prepare_COCO.py
File metadata and controls
77 lines (62 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import torch
import json
from clip import clip
import numpy as np
from PIL import Image
device = "cuda:0"
clip_model, preprocess = clip.load("RN50x64", device = device)
with torch.no_grad():
captions = []
json_path = "./data/COCO/train.json"
json_labels = json.load(open(json_path,'r'))
annotations = json_labels
for annotation in annotations[:566720]:
captions.append(annotation["caption"])
features = []
index = 0
batch_size = 256
while index < len(captions):
batch_captions = captions[index : index+batch_size]
clip_captions = clip.tokenize(batch_captions).to(device)
clip_features = clip_model.encode_text(clip_captions)
features.append(clip_features)
index += batch_size
caption_features = torch.cat(features)
torch.save(caption_features, "./feature/COCO/caption_features.pkl")
captions = np.array(captions)
np.save("./feature/COCO/captions.npy", captions)
caption_features = caption_features / caption_features.norm(dim = -1, keepdim = True)
nibers = []
for i in range(caption_features.shape[0]):
caption_feature = caption_features[i].unsqueeze(0)
similarity = caption_feature @ caption_features.T
similarity[0][i] = 0
niber = []
for j in range(5):
_, max_id = torch.max(similarity, dim = 1)
niber.append(max_id.item())
similarity[0][max_id.item()] = 0
nibers.append(niber)
nibers = np.array(nibers)
np.save("./feature/COCO/nibers.npy", nibers)
json_path = "./data/COCO/captions_val2014.json"
json_labels = json.load(open(json_path,'r'))
annotations = json_labels["annotations"]
images = json_labels["images"]
images_path = "./data/COCO/image/"
image_dict = dict()
for image in images:
image_dict[image["file_name"]] = image["id"]
with open("./data/COCO/coco_test.txt") as image_names_data:
image_names = image_names_data.readlines()
image_features = []
for image_info in image_names:
image_file = image_info.split('\n')[0]
image_id = image_dict[image_file]
image_path = images_path + image_file
ori_image = Image.open(image_path)
image = preprocess(ori_image).unsqueeze(0).to(device)
image_feature = clip_model.encode_image(image)
image_features.append(image_feature)
image_features = torch.cat(image_features)
torch.save(image_features, "./feature/COCO/image_features.pkl")