Skip to content

Commit e6ff7fe

Browse files
committed
minimal doc for clip batch and some fixes
1 parent 73e8429 commit e6ff7fe

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

Diff for: .gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.env

Diff for: README.md

+14
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,16 @@
11
# clip-retrieval
22
Easily computing clip embeddings and building a clip retrieval system with them
3+
4+
## clip batch
5+
6+
First install it by running:
7+
```
8+
python3 -m venv .env
9+
source .env/bin/activate
10+
pip install -U pip
11+
pip install clip-by-openai faiss-cpu fire
12+
```
13+
14+
Then put some images in a `example_folder` and some text with the same name (or use --enable_text=False) then
15+
* `python clip_batch.py --dataset_path example_folder --output_folder output_folder`
16+

Diff for: clip_batch.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!pip install clip-by-openai torch faiss
1+
#!pip install clip-by-openai faiss-cpu fire
22
import torch
33
import clip
44
from PIL import Image
@@ -29,7 +29,6 @@ def __init__(self,
2929
):
3030
super().__init__()
3131
path = Path(folder)
32-
self.model = model
3332
self.enable_text = enable_text
3433
self.enable_image = enable_image
3534

@@ -76,13 +75,14 @@ def __getitem__(self, ind):
7675
description = descriptions[self.description_index]
7776
tokenized_text = self.tokenizer([description[:255]])[0]
7877

79-
return {"image_tensor": image_tensor, "text_tokens": tokenized_text, "image_path": str(image_file), "text": description}
78+
return {"image_tensor": image_tensor, "text_tokens": tokenized_text, "image_filename": str(image_file), "text": description}
8079

8180

82-
def main(dataset_path, output_folder, batch_size=256, num_prepro_workers=32, description_index=0, enable_text=True, enable_image=True):
81+
def main(dataset_path, output_folder, batch_size=256, num_prepro_workers=8, description_index=0, enable_text=True, enable_image=True):
8382
device = "cuda" if torch.cuda.is_available() else "cpu"
84-
model, preprocess = clip.load("ViT-B/32", device=device)
85-
os.mkdir(output_folder)
83+
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
84+
if not os.path.exists(output_folder):
85+
os.mkdir(output_folder)
8686
data = DataLoader(ImageDataset(preprocess, dataset_path, description_index=description_index, enable_text=enable_text, enable_image=enable_image), \
8787
batch_size=batch_size, shuffle=False, num_workers=num_prepro_workers, pin_memory=True, prefetch_factor=2)
8888
if enable_image:
@@ -101,7 +101,7 @@ def main(dataset_path, output_folder, batch_size=256, num_prepro_workers=32, des
101101
if enable_text:
102102
text_features = model.encode_text(item["text_tokens"].cuda())
103103
text_embeddings.append(text_features.cpu().numpy())
104-
descriptions.extend(item["description"])
104+
descriptions.extend(item["text"])
105105

106106
if enable_image:
107107
img_emb_mat = np.concatenate(image_embeddings)

0 commit comments

Comments
 (0)