Skip to content

Commit d6e0bcc

Browse files
committed
add some benchmark info of clip back in the readme
1 parent c0cc06d commit d6e0bcc

File tree

4 files changed

+354
-3
lines changed

4 files changed

+354
-3
lines changed

Diff for: README.md

+26-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Easily compute clip embeddings and build a clip retrieval system with them. 100M
1515
End to end this make it possible to build a simple semantic search system.
1616
Interested to learn about semantic search in general ? You can read my [medium post](https://rom1504.medium.com/semantic-search-with-embeddings-index-anything-8fb18556443c) on the topic.
1717

18-
[<img src="./clip-front-pic.png" alt="viewer" width="500">](https://rom1504.github.io/clip-retrieval/)
18+
[<img src="https://github.com/rom1504/clip-retrieval/raw/main/doc_assets/clip-front-pic.png" alt="clip front" width="500">](https://rom1504.github.io/clip-retrieval/)
1919

2020
## Who is using clip retrieval ?
2121

@@ -125,11 +125,13 @@ At this point you have a simple flask server running on port 1234 and that can a
125125
{
126126
"text": "a text query",
127127
"image": "a base64 image",
128+
"image_url": "http://some-url.com/a.jpg",
128129
"modality": "image", // image or text index to use
129130
"num_images": 4, // number of output images
130131
"indice_name": "example_index"
131132
}
132133
```
134+
text, image and image_url are mutually exclusive
133135
and returns:
134136
```js
135137
[
@@ -143,10 +145,31 @@ and returns:
143145
}
144146
]
145147
```
148+
Each object may also contain an url field if the metadata provides it.
146149

147-
This achieve low latency status (10ms). Throughput is about 100 query/s. For high throughput, using a grpc server is required.
150+
### Clip back: Benchmark and monitoring
148151

149-
This backends also exposes a prometheus `/metrics` endpoint as well as an human readable summary at `/metrics-summary`
152+
This backends has a 50ms latency if using memory mapped indices and metadata. Throughput is about 20 query/s. For high throughput, using a grpc server is required as well as a GPU for fast clip inference, turning off memory mapping options can also speed up requests, at the cost of high ram usage.
153+
154+
This backends also exposes a prometheus `/metrics` endpoint as well as an human readable summary at `/metrics-summary`.
155+
This can (optionally) be used to setup a [grafana dashboard](doc_assets/grafana_dashboard.json) for monitoring:
156+
157+
[<img src="https://github.com/rom1504/clip-retrieval/raw/main/doc_assets/clip-back-grafana.png" alt="grafana" width="1200">](https://github.com/rom1504/clip-retrieval/raw/main/doc_assets/clip-back-grafana.png)
158+
159+
It can be seen on this dashboard that the slowest part of any call is fetching the image by its url in case of image url search, taking up to 300ms.
160+
For text queries or image queries, the latency is about 50ms.
161+
Here is an example of output in the metrics summary:
162+
```
163+
Among 20.0 calls to the knn end point with an average latency of 0.1889s per request, the step costs are (in order):
164+
name description calls average proportion
165+
0 download_time Time spent downloading an url 6 0.3215s 170.2%
166+
1 metadata_get_time Time spent retrieving metadata 20 0.0415s 21.9%
167+
2 knn_index_time Time spent doing a knn on the index 20 0.0267s 14.1%
168+
3 image_clip_inference_time Time spent doing a image clip inference 6 0.0206s 10.9%
169+
4 text_clip_inference_time Time spent doing a text clip inference 14 0.0186s 9.8%
170+
5 image_prepro_time Time spent doing the image preprocessing 6 0.0097s 5.2%
171+
6 text_prepro_time Time spent doing the text preprocessing 14 0.0020s 1.0%
172+
```
150173

151174
## clip-front
152175

Diff for: doc_assets/clip-back-grafana.png

25.5 KB
Loading
File renamed without changes.

Diff for: doc_assets/grafana_dashboard.json

+328
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
{
2+
"__inputs": [],
3+
"__requires": [
4+
{
5+
"type": "grafana",
6+
"id": "grafana",
7+
"name": "Grafana",
8+
"version": "8.0.6"
9+
},
10+
{
11+
"type": "panel",
12+
"id": "timeseries",
13+
"name": "Time series",
14+
"version": ""
15+
}
16+
],
17+
"annotations": {
18+
"list": [
19+
{
20+
"builtIn": 1,
21+
"datasource": "-- Grafana --",
22+
"enable": true,
23+
"hide": true,
24+
"iconColor": "rgba(0, 211, 255, 1)",
25+
"name": "Annotations & Alerts",
26+
"type": "dashboard"
27+
}
28+
]
29+
},
30+
"editable": true,
31+
"gnetId": null,
32+
"graphTooltip": 0,
33+
"id": null,
34+
"links": [],
35+
"panels": [
36+
{
37+
"datasource": null,
38+
"fieldConfig": {
39+
"defaults": {
40+
"color": {
41+
"mode": "palette-classic"
42+
},
43+
"custom": {
44+
"axisLabel": "",
45+
"axisPlacement": "auto",
46+
"barAlignment": 0,
47+
"drawStyle": "line",
48+
"fillOpacity": 0,
49+
"gradientMode": "none",
50+
"hideFrom": {
51+
"legend": false,
52+
"tooltip": false,
53+
"viz": false
54+
},
55+
"lineInterpolation": "linear",
56+
"lineWidth": 1,
57+
"pointSize": 5,
58+
"scaleDistribution": {
59+
"type": "linear"
60+
},
61+
"showPoints": "auto",
62+
"spanNulls": false,
63+
"stacking": {
64+
"group": "A",
65+
"mode": "none"
66+
},
67+
"thresholdsStyle": {
68+
"mode": "off"
69+
}
70+
},
71+
"mappings": [],
72+
"thresholds": {
73+
"mode": "absolute",
74+
"steps": [
75+
{
76+
"color": "green",
77+
"value": null
78+
},
79+
{
80+
"color": "red",
81+
"value": 80
82+
}
83+
]
84+
},
85+
"unit": "s"
86+
},
87+
"overrides": []
88+
},
89+
"gridPos": {
90+
"h": 9,
91+
"w": 12,
92+
"x": 0,
93+
"y": 0
94+
},
95+
"id": 2,
96+
"options": {
97+
"legend": {
98+
"calcs": [],
99+
"displayMode": "list",
100+
"placement": "bottom"
101+
},
102+
"tooltip": {
103+
"mode": "single"
104+
}
105+
},
106+
"targets": [
107+
{
108+
"exemplar": true,
109+
"expr": "full_knn_request_time_sum / full_knn_request_time_count",
110+
"interval": "",
111+
"legendFormat": "full",
112+
"refId": "A"
113+
},
114+
{
115+
"exemplar": true,
116+
"expr": "metadata_get_time_sum / metadata_get_time_count",
117+
"hide": false,
118+
"interval": "",
119+
"legendFormat": "metadata",
120+
"refId": "B"
121+
},
122+
{
123+
"exemplar": true,
124+
"expr": "image_clip_inference_time_sum / image_clip_inference_time_count",
125+
"hide": false,
126+
"interval": "",
127+
"legendFormat": "image clip inference",
128+
"refId": "C"
129+
},
130+
{
131+
"exemplar": true,
132+
"expr": "text_clip_inference_time_sum / text_clip_inference_time_count",
133+
"hide": false,
134+
"interval": "",
135+
"legendFormat": "text clip inference",
136+
"refId": "D"
137+
},
138+
{
139+
"exemplar": true,
140+
"expr": "download_time_sum / download_time_count",
141+
"hide": false,
142+
"interval": "",
143+
"legendFormat": "download time",
144+
"refId": "E"
145+
},
146+
{
147+
"exemplar": true,
148+
"expr": "knn_index_time_sum / knn_index_time_count",
149+
"hide": false,
150+
"interval": "",
151+
"legendFormat": "knn index time",
152+
"refId": "F"
153+
},
154+
{
155+
"exemplar": true,
156+
"expr": "image_prepro_time_sum / image_prepro_time_count",
157+
"hide": false,
158+
"interval": "",
159+
"legendFormat": "image prepro",
160+
"refId": "G"
161+
},
162+
{
163+
"exemplar": true,
164+
"expr": "text_prepro_time_sum / text_prepro_time_count",
165+
"hide": false,
166+
"interval": "",
167+
"legendFormat": "text prepro",
168+
"refId": "H"
169+
}
170+
],
171+
"title": "Average latencies",
172+
"type": "timeseries"
173+
},
174+
{
175+
"datasource": null,
176+
"fieldConfig": {
177+
"defaults": {
178+
"color": {
179+
"mode": "palette-classic"
180+
},
181+
"custom": {
182+
"axisLabel": "",
183+
"axisPlacement": "auto",
184+
"barAlignment": 0,
185+
"drawStyle": "line",
186+
"fillOpacity": 0,
187+
"gradientMode": "none",
188+
"hideFrom": {
189+
"legend": false,
190+
"tooltip": false,
191+
"viz": false
192+
},
193+
"lineInterpolation": "linear",
194+
"lineWidth": 1,
195+
"pointSize": 5,
196+
"scaleDistribution": {
197+
"type": "linear"
198+
},
199+
"showPoints": "auto",
200+
"spanNulls": false,
201+
"stacking": {
202+
"group": "A",
203+
"mode": "none"
204+
},
205+
"thresholdsStyle": {
206+
"mode": "off"
207+
}
208+
},
209+
"mappings": [],
210+
"thresholds": {
211+
"mode": "absolute",
212+
"steps": [
213+
{
214+
"color": "green",
215+
"value": null
216+
},
217+
{
218+
"color": "red",
219+
"value": 80
220+
}
221+
]
222+
},
223+
"unit": "none"
224+
},
225+
"overrides": []
226+
},
227+
"gridPos": {
228+
"h": 9,
229+
"w": 12,
230+
"x": 12,
231+
"y": 0
232+
},
233+
"id": 3,
234+
"options": {
235+
"legend": {
236+
"calcs": [],
237+
"displayMode": "list",
238+
"placement": "bottom"
239+
},
240+
"tooltip": {
241+
"mode": "single"
242+
}
243+
},
244+
"targets": [
245+
{
246+
"exemplar": true,
247+
"expr": "increase(full_knn_request_time_count[$__range])\n",
248+
"interval": "",
249+
"legendFormat": "full",
250+
"refId": "A"
251+
},
252+
{
253+
"exemplar": true,
254+
"expr": "increase(metadata_get_time_count[$__range])\n",
255+
"hide": false,
256+
"interval": "",
257+
"legendFormat": "metadata",
258+
"refId": "B"
259+
},
260+
{
261+
"exemplar": true,
262+
"expr": "increase(image_clip_inference_time_count[$__range])",
263+
"hide": false,
264+
"interval": "",
265+
"legendFormat": "image clip inference",
266+
"refId": "C"
267+
},
268+
{
269+
"exemplar": true,
270+
"expr": "increase(text_clip_inference_time_count[$__range])",
271+
"hide": false,
272+
"interval": "",
273+
"legendFormat": "text clip inference",
274+
"refId": "D"
275+
},
276+
{
277+
"exemplar": true,
278+
"expr": "increase(download_time_count[$__range])",
279+
"hide": false,
280+
"interval": "",
281+
"legendFormat": "download time",
282+
"refId": "E"
283+
},
284+
{
285+
"exemplar": true,
286+
"expr": "increase(knn_index_time_count[$__range])",
287+
"hide": false,
288+
"interval": "",
289+
"legendFormat": "knn index time",
290+
"refId": "F"
291+
},
292+
{
293+
"exemplar": true,
294+
"expr": "increase(image_prepro_time_count[$__range])",
295+
"hide": false,
296+
"interval": "",
297+
"legendFormat": "image prepro",
298+
"refId": "G"
299+
},
300+
{
301+
"exemplar": true,
302+
"expr": "increase(text_prepro_time_count[$__range])",
303+
"hide": false,
304+
"interval": "",
305+
"legendFormat": "text prepro",
306+
"refId": "H"
307+
}
308+
],
309+
"title": "Request count",
310+
"type": "timeseries"
311+
}
312+
],
313+
"schemaVersion": 30,
314+
"style": "dark",
315+
"tags": [],
316+
"templating": {
317+
"list": []
318+
},
319+
"time": {
320+
"from": "now-1h",
321+
"to": "now"
322+
},
323+
"timepicker": {},
324+
"timezone": "",
325+
"title": "Clip",
326+
"uid": "zF8DzpI7z",
327+
"version": 2
328+
}

0 commit comments

Comments
 (0)