package it

rom1504 · rom1504 · commit 210c3f9cd77f · 2021-08-11T23:24:40.000Z
diff --git a/.github/python-publish.yml b/.github/python-publish.yml
@@ -0,0 +1,31 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
diff --git a/.gitignore b/.gitignore
@@ -1 +1,11 @@
-.env
+.env
+*.egg-info
+.vscode
+.env
+__pycache__
+myimglist.txt
+.ipynb_checkpoints
+output_folder
+indice_folder
+image_folder
+cat
diff --git a/.gitpod.DockerFile b/.gitpod.DockerFile
@@ -0,0 +1,3 @@
+FROM gitpod/workspace-full:latest
+
+RUN apt-get update && apt-get install -y python3-opencv
diff --git a/.gitpod.yml b/.gitpod.yml
@@ -0,0 +1,2 @@
+image:
+  file: .gitpod.DockerFile
diff --git a/HISTORY.md b/HISTORY.md
@@ -0,0 +1,3 @@
+## 1.0.0
+
+* it works
diff --git a/README.md b/README.md
@@ -1,4 +1,8 @@
 # clip-retrieval
+[![pypi](https://img.shields.io/pypi/v/clip-retrieval.svg)](https://pypi.python.org/pypi/clip-retrieval)
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rom1504/clip-retrieval/blob/master/notebook/clip-retrieval-getting-started.ipynb)
+[![Try it on gitpod](https://img.shields.io/badge/try-on%20gitpod-brightgreen.svg)](https://gitpod.io/#https://github.com/rom1504/clip-retrieval)
+
 Easily computing clip embeddings and building a clip retrieval system with them.
 
 * clip batch allows you to quickly (1500 sample/s on a 3080) compute image and text embeddings and indices
@@ -9,18 +13,23 @@ Easily computing clip embeddings and building a clip retrieval system with them.
 End to end this make it possible to build a simple semantic search system.
 Interested to learn about semantic search in general ? You can read by [medium post](https://rom1504.medium.com/semantic-search-with-embeddings-index-anything-8fb18556443c) on the topic.
 
+## Install
+
+pip install clip-retrieval
+
 ## clip batch
 
-First install it by running:
+Get some images in an `example_folder`, for example by doing:
 ```
-python3 -m venv .env
-source .env/bin/activate
-pip install -U pip
-pip install clip-anytorch faiss-cpu fire
+pip install img2dataset
+echo 'https://placekitten.com/200/305' >> myimglist.txt
+echo 'https://placekitten.com/200/304' >> myimglist.txt
+echo 'https://placekitten.com/200/303' >> myimglist.txt
+img2dataset --url_list=myimglist.txt --output_folder=image_folder --thread_count=64 --image_size=256
 ```
+You can also put text files with the same names as the images in that folder, to get the text embeddings.
 
-Then put some images in a `example_folder` and some text with the same name (or use --enable_text=False) then
-* `python clip_batch.py  --dataset_path example_folder --output_folder output_folder`
+Then run `clip-retrieval batch --dataset_path image_folder --output_folder indice_folder`
 
 Output folder will contain:
 * description_list containing the list of caption line by line
@@ -33,24 +42,16 @@ Output folder will contain:
 ## Clip filter
 
 Once the embeddings are computed, you may want to filter out the data by a specific query.
-For that you can run `python clip_filter.py --query "dog" --output_folder "dog/" --indice_name "example_index"`
+For that you can run `clip-retrieval filter --query "cat" --output_folder "cat/" --indice_folder "indice_folder"`
 It will copy the 100 best images for this query in the output folder.
 Using the `--num_results` or `--threshold` may be helpful to refine the filter
 
 ## Clip back
 
-First install it by running:
-```bash
-python3 -m venv .env
-source .env/bin/activate
-pip install -U pip
-pip install clip-anytorch faiss-cpu fire flask flask_cors flask_restful 
-```
-
 Then run (output_folder is the output of clip batch)
 ```bash
 echo '{"example_index": "output_folder"}' > indices_paths.json
-python clip_back.py 1234
+clip-retrieval back --port 1234 --indices-paths indices_paths.json
 ```
 
 At this point you have a simple flask server running on port 1234 and that can answer these queries:
@@ -78,4 +79,17 @@ and returns:
         "text": "some result text"
     }
 ]
-```
+```
+
+## For development
+
+Either locally, or in [gitpod](https://gitpod.io/#https://github.com/rom1504/img2dataset) (do `export PIP_USER=false` there)
+
+Setup a virtualenv:
+
+```
+python3 -m venv .env
+source .env/bin/activate
+pip install -U pip
+pip install -e .
+```
diff --git a/clip_retrieval/__init__.py b/clip_retrieval/__init__.py
diff --git a/clip_retrieval/cli.py b/clip_retrieval/cli.py
@@ -0,0 +1,16 @@
+from clip_retrieval.clip_back import clip_back
+from clip_retrieval.clip_batch import clip_batch
+from clip_retrieval.clip_filter import clip_filter
+import fire
+import logging
+
+
+def main():
+    """Main entry point"""
+    fire.Fire(
+        {
+            "back": clip_back,
+            "batch": clip_batch,
+            "filter": clip_filter
+        }
+    )
diff --git a/clip_retrieval/clip_back.py b/clip_retrieval/clip_back.py
@@ -11,49 +11,23 @@
 from PIL import Image
 import base64
 import os
+import fire
 
 
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
-
-indices = json.load(open("indices_paths.json"))
-
-indices_loaded = {}
-
-for name, indice_folder in indices.items():
-    image_present = os.path.exists(indice_folder+"/image_list")
-    text_present = os.path.exists(indice_folder+"/description_list")
-    if image_present:
-        with open(indice_folder+"/image_list") as f:
-            image_list = f.read().split("\n")
-        image_index = faiss.read_index(indice_folder+"/image.index")
-    else:
-        image_list = None
-        image_index = None
-    if text_present:
-        with open(indice_folder+"/description_list") as f:
-            description_list = f.read().split("\n")
-        text_index = faiss.read_index(indice_folder+"/text.index")
-    else:
-        description_list = None
-        text_index = None
-    indices_loaded[name]={
-        'image_list': image_list,
-        'description_list': description_list,
-        'image_index': image_index,
-        'text_index': text_index
-    }
-
 class Health(Resource):
     def get(self):
         return "ok"
 
 class IndicesList(Resource):
-    def get(self):
-        return list(indices.keys())
+    def get(self, **kwargs):
+        return list(kwargs['indices'].keys())
 
 class KnnService(Resource):
-    def post(self):
+    def post(self, **kwargs):
+        indices_loaded = kwargs['indices_loaded']
+        device = kwargs['device']
+        model = kwargs['model']
+        preprocess = kwargs['preprocess']
         json_data = request.get_json(force=True)
         text_input = json_data["text"] if "text" in json_data else None
         image_input = json_data["image"] if "image" in json_data else None
@@ -91,15 +65,50 @@ def post(self):
             img.save(buffered, format="JPEG")
             img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") 
             results.append({"image": img_str, "text": description})
-        return results        
-        
+        return results
 
-app = Flask(__name__)
-api = Api(app)
-api.add_resource(IndicesList, '/indices-list')
-api.add_resource(KnnService, '/knn-service')
-api.add_resource(Health, '/')
 
-if __name__ == '__main__':
+def clip_back(indices_paths="indices_paths.json", port=1234):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
+
+    indices = json.load(open(indices_paths))
+
+    indices_loaded = {}
+
+    for name, indice_folder in indices.items():
+        image_present = os.path.exists(indice_folder+"/image_list")
+        text_present = os.path.exists(indice_folder+"/description_list")
+        if image_present:
+            with open(indice_folder+"/image_list") as f:
+                image_list = [x for x in f.read().split("\n") if x !=""]
+            image_index = faiss.read_index(indice_folder+"/image.index")
+        else:
+            image_list = None
+            image_index = None
+        if text_present:
+            with open(indice_folder+"/description_list") as f:
+                description_list = [x for x in f.read().split("\n") if x !=""]
+            text_index = faiss.read_index(indice_folder+"/text.index")
+        else:
+            description_list = None
+            text_index = None
+        indices_loaded[name]={
+            'image_list': image_list,
+            'description_list': description_list,
+            'image_index': image_index,
+            'text_index': text_index
+        }
+
+    app = Flask(__name__)
+    api = Api(app)
+    api.add_resource(IndicesList, '/indices-list', resource_class_kwargs={'indices': indices})
+    api.add_resource(KnnService, '/knn-service', resource_class_kwargs={'indices_loaded': indices_loaded, 'device': device, \
+    'model': model, 'preprocess': preprocess})
+    api.add_resource(Health, '/')
     CORS(app)
-    app.run(host="0.0.0.0", port=int(sys.argv[1]), debug=False)
+    app.run(host="0.0.0.0", port=port, debug=False)
+
+
+if __name__ == '__main__':
+  fire.Fire(clip_back)
diff --git a/clip_retrieval/clip_batch.py b/clip_retrieval/clip_batch.py
@@ -34,18 +34,22 @@ def __init__(self,
         if self.enable_text:
             text_files = [*path.glob('**/*.txt')]
             text_files = {text_file.stem: text_file for text_file in text_files}
+            if len(text_files) == 0:
+                self.enable_text = False
         if self.enable_image:
             image_files = [
                 *path.glob('**/*.png'), *path.glob('**/*.jpg'),
                 *path.glob('**/*.jpeg'), *path.glob('**/*.bmp')
             ]
             image_files = {image_file.stem: image_file for image_file in image_files}
+            if len(image_files) == 0:
+                self.enable_image = False
 
-        if enable_text and enable_image:
+        if self.enable_text and self.enable_image:
             keys = (image_files.keys() & text_files.keys())
-        elif enable_text:
+        elif self.enable_text:
             keys = text_files.keys()
-        elif enable_image:
+        elif self.enable_image:
             keys = image_files.keys()
 
         self.keys = list(keys)
@@ -63,27 +67,35 @@ def __len__(self):
     def __getitem__(self, ind):
         key = self.keys[ind]
 
+        output = {}
+
         if self.enable_image:
             image_file = self.image_files[key]
             image_tensor = self.image_transform(PIL.Image.open(image_file))
+            output["image_filename"] = str(image_file)
+            output["image_tensor"] = image_tensor
 
 
         if self.enable_text:
             text_file = self.text_files[key]
             descriptions = text_file.read_text().split('\n')
             description = descriptions[self.description_index]
             tokenized_text  = self.tokenizer(description)
+            output["text_tokens"] = tokenized_text
+            output["text"] = description
 
-        return {"image_tensor": image_tensor, "text_tokens": tokenized_text, "image_filename": str(image_file), "text": description}
+        return output
     
 
-def main(dataset_path, output_folder, batch_size=256, num_prepro_workers=8, description_index=0, enable_text=True, enable_image=True):
+def clip_batch(dataset_path, output_folder, batch_size=256, num_prepro_workers=8, description_index=0, enable_text=True, enable_image=True):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
     if not os.path.exists(output_folder):
         os.mkdir(output_folder)
-    data = DataLoader(ImageDataset(preprocess, dataset_path, description_index=description_index, enable_text=enable_text, enable_image=enable_image), \
-        batch_size=batch_size, shuffle=False, num_workers=num_prepro_workers, pin_memory=True, prefetch_factor=2)
+    dataset = ImageDataset(preprocess, dataset_path, description_index=description_index, enable_text=enable_text, enable_image=enable_image)
+    enable_text = dataset.enable_text
+    enable_image = dataset.enable_image
+    data = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_prepro_workers, pin_memory=True, prefetch_factor=2)
     if enable_image:
         image_embeddings = []
         image_names = []
@@ -94,12 +106,12 @@ def main(dataset_path, output_folder, batch_size=256, num_prepro_workers=8, desc
     for i, item in enumerate(tqdm(data)):
         with torch.no_grad():
             if enable_image:
-                image_features = model.encode_image(item["image_tensor"].cuda())
+                image_features = model.encode_image(item["image_tensor"].to(device))
                 image_features /= image_features.norm(dim=-1, keepdim=True)
                 image_embeddings.append(image_features.cpu().numpy())
                 image_names.extend(item["image_filename"])
             if enable_text:
-                text_features = model.encode_text(item["text_tokens"].cuda())
+                text_features = model.encode_text(item["text_tokens"].to(device))
                 text_features /= text_features.norm(dim=-1, keepdim=True)
                 text_embeddings.append(text_features.cpu().numpy())
                 descriptions.extend(item["text"])
@@ -125,4 +137,4 @@ def main(dataset_path, output_folder, batch_size=256, num_prepro_workers=8, desc
         faiss.write_index(text_index, output_folder +"/text.index")
     
 if __name__ == '__main__':
-  fire.Fire(main)
+  fire.Fire(clip_batch)
diff --git a/clip_retrieval/clip_filter.py b/clip_retrieval/clip_filter.py
diff --git a/notebook/clip-retrieval-getting-started.ipynb b/notebook/clip-retrieval-getting-started.ipynb
diff --git a/setup.py b/setup.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+FROM gitpod/workspace-full:latest`
	`2`	`+`
	`3`	`+RUN apt-get update && apt-get install -y python3-opencv`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+image:`
	`2`	`+ file: .gitpod.DockerFile`