Skip to content

Commit 6e4cb89

Browse files
committed
resolve conflict
2 parents c5cbed1 + d137b2b commit 6e4cb89

File tree

4 files changed

+3096
-3
lines changed

4 files changed

+3096
-3
lines changed

README.md

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -573,18 +573,55 @@ Sign up for free to be a beta tester and get early access. Drop us an email at i
573573
<img src="./gallery/colab_logo.png" height="28">
574574
</a>
575575
</td>
576-
</tr>
576+
</tr>
577577
<tr>
578578
<td align="center">
579579
<a href="https://kaggle.com/kernels/welcome?src=https://github.com/visual-layer/fastdup/blob/main/examples/image-search.ipynb">
580580
<img src="./gallery/kaggle_logo.png" height="32">
581581
</a>
582582
</td>
583583
</tr>
584-
</table>
585-
584+
<!-- ------------------------------------------------------------------- -->
586585

586+
<tr>
587+
<td rowspan="4" width="160">
588+
<a href="https://visual-layer.readme.io/docs/running-over-extracted-features">
589+
<img src="gallery/feature_vector.png" width="256">
590+
</a>
591+
</td>
592+
<td rowspan="4">
593+
<b>Feature vectors:</b> In this tutorial, learn how to read fastdup generated feature vectors in Python and use them for downstream processing, or run fastdup on your calculated feature vectors.
594+
</td>
595+
<td align="center" width="80">
596+
<a href="https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/feature_vectors.ipynb">
597+
<img src="./gallery/nbviewer_logo.svg" height="34">
598+
</a>
599+
</td>
600+
</tr>
601+
<tr>
602+
<td align="center">
603+
<a href="https://github.com/visual-layer/fastdup/blob/main/examples/feature_vectors.ipynb">
604+
<img src="./gallery/github_logo.png" height="32">
605+
</a>
606+
</td>
607+
</tr>
608+
<tr>
609+
<td align="center">
610+
<a href="https://colab.research.google.com/github/visual-layer/fastdup/blob/main/examples/feature_vectors.ipynb">
611+
<img src="./gallery/colab_logo.png" height="28">
612+
</a>
613+
</td>
614+
</tr>
615+
<tr>
616+
<td align="center">
617+
<a href="https://kaggle.com/kernels/welcome?src=https://github.com/visual-layer/fastdup/blob/main/examples/feature_vectors.ipynb">
618+
<img src="./gallery/kaggle_logo.png" height="32">
619+
</a>
620+
</td>
621+
</tr>
587622

623+
624+
</table>
588625

589626

590627
## Getting Help

examples/feature_vectors.ipynb

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "a6445d60-47e5-4f91-93cb-3da1a37bc205",
6+
"metadata": {},
7+
"source": [
8+
"# Tutorial for working directly with feature vectors"
9+
]
10+
},
11+
{
12+
"cell_type": "markdown",
13+
"id": "9ece0f6e-518a-49b6-b06a-959d50bef991",
14+
"metadata": {},
15+
"source": [
16+
"## Use case 1: compute feature vectors with fastdup and load then with numpy for further processing"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": 4,
22+
"id": "fd4a0e45-55d9-4544-877f-9dfcce74a0ad",
23+
"metadata": {},
24+
"outputs": [
25+
{
26+
"name": "stdout",
27+
"output_type": "stream",
28+
"text": [
29+
"\u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
30+
"\u001b[0mRequirement already satisfied: fastdup in /Users/dannybickson/homebrew/lib/python3.8/site-packages (0.926)\n",
31+
"Collecting fastdup\n",
32+
" Downloading fastdup-1.0-cp38-cp38-macosx_11_0_arm64.whl (32.8 MB)\n",
33+
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m32.8/32.8 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
34+
"\u001b[?25hRequirement already satisfied: numpy in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (1.24.3)\n",
35+
"Requirement already satisfied: tqdm in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (4.65.0)\n",
36+
"Requirement already satisfied: pillow in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (9.5.0)\n",
37+
"Requirement already satisfied: pyyaml in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (6.0)\n",
38+
"Requirement already satisfied: pandas in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (2.0.1)\n",
39+
"Requirement already satisfied: requests==2.28.1 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (2.28.1)\n",
40+
"Requirement already satisfied: sentry-sdk in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (1.21.1)\n",
41+
"Requirement already satisfied: packaging in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (23.1)\n",
42+
"Requirement already satisfied: certifi in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (2022.12.7)\n",
43+
"Requirement already satisfied: opencv-python-headless<=4.5.5.64 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from fastdup) (4.5.5.64)\n",
44+
"Requirement already satisfied: charset-normalizer<3,>=2 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from requests==2.28.1->fastdup) (2.1.1)\n",
45+
"Requirement already satisfied: idna<4,>=2.5 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from requests==2.28.1->fastdup) (3.4)\n",
46+
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from requests==2.28.1->fastdup) (1.26.15)\n",
47+
"Requirement already satisfied: tzdata>=2022.1 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from pandas->fastdup) (2023.3)\n",
48+
"Requirement already satisfied: pytz>=2020.1 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from pandas->fastdup) (2023.3)\n",
49+
"Requirement already satisfied: python-dateutil>=2.8.2 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from pandas->fastdup) (2.8.2)\n",
50+
"Requirement already satisfied: six>=1.5 in /Users/dannybickson/homebrew/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->fastdup) (1.16.0)\n",
51+
"Installing collected packages: fastdup\n",
52+
" Attempting uninstall: fastdup\n",
53+
" Found existing installation: fastdup 0.926\n",
54+
" Uninstalling fastdup-0.926:\n",
55+
" Successfully uninstalled fastdup-0.926\n",
56+
"\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
57+
"\u001b[0m\u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
58+
"\u001b[0mSuccessfully installed fastdup-1.0\n",
59+
"Note: you may need to restart the kernel to use updated packages.\n"
60+
]
61+
}
62+
],
63+
"source": [
64+
"%pip install -U fastdup"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": 1,
70+
"id": "d6f3966d-e596-494f-91c6-282a3222919e",
71+
"metadata": {},
72+
"outputs": [
73+
{
74+
"name": "stdout",
75+
"output_type": "stream",
76+
"text": [
77+
"FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n",
78+
"2023-05-17 15:11:02 [INFO] Going to loop over dir /Users/dannybickson/visual_database/cxx/unittests/two_images\n",
79+
"2023-05-17 15:11:02 [INFO] Found total 2 images to run on, 2 train, 0 test, name list 2, counter 2 \n",
80+
"2023-05-17 15:11:03 [INFO] Found total 2 images to run onEstimated: 0 Minutess\n",
81+
"2023-05-17 15:11:03 [INFO] 19) Finished write_index() NN model\n",
82+
"2023-05-17 15:11:03 [INFO] Stored nn model index file out/nnf.index\n",
83+
"2023-05-17 15:11:03 [INFO] Total time took 1030 ms\n",
84+
"2023-05-17 15:11:03 [INFO] Found a total of 0 fully identical images (d>0.990), which are 0.00 %\n",
85+
"2023-05-17 15:11:03 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n",
86+
"2023-05-17 15:11:03 [INFO] Found a total of 0 above threshold images (d>0.900), which are 0.00 %\n",
87+
"2023-05-17 15:11:03 [INFO] Found a total of 1 outlier images (d<0.050), which are 50.00 %\n",
88+
"2023-05-17 15:11:03 [INFO] Min distance found 0.805 max distance 0.805\n",
89+
"2023-05-17 15:11:03 [INFO] Running connected components for ccthreshold 0.960000 \n",
90+
".0Read a total of 2 images\n",
91+
"Read embedding matrix of shape (2, 576)\n",
92+
"Image filenames are\n",
93+
"['/Users/dannybickson/visual_database/cxx/unittests/two_images/test_1234.jpg', '/Users/dannybickson/visual_database/cxx/unittests/two_images/train_1274.jpg']\n"
94+
]
95+
}
96+
],
97+
"source": [
98+
"import fastdup\n",
99+
"import numpy as np\n",
100+
"# Run fastup on an input image folder to create embeddings\n",
101+
"fd = fastdup.create(input_dir='/Users/dannybickson/visual_database/cxx/unittests/two_images/', work_dir='out')\n",
102+
"fd.run(overwrite=True, print_summary=False)\n",
103+
"\n",
104+
"# Read the embeddings to use them in python\n",
105+
"# There are two images in the input_dir, so the embedding matrix is 2x 576. \n",
106+
"# Each row in the embedding matrix is an image.\n",
107+
"flist, embedding_matrix = fastdup.load_binary_feature(filename='./out/atrain_features.dat')\n",
108+
"print('Read embedding matrix of shape', embedding_matrix.shape)\n",
109+
"print('Image filenames are')\n",
110+
"print(flist)\n"
111+
]
112+
},
113+
{
114+
"cell_type": "markdown",
115+
"id": "4c22f533-a30b-404b-abff-9812a9dedba6",
116+
"metadata": {},
117+
"source": [
118+
"# Use case 2: Save your own binary features to work with fastdup"
119+
]
120+
},
121+
{
122+
"cell_type": "markdown",
123+
"id": "dea40128-eac2-4e3c-8002-f89d3528fd1b",
124+
"metadata": {},
125+
"source": [
126+
"## Version 0.2"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": 2,
132+
"id": "d2617508-a261-41d7-919f-41f7e533891b",
133+
"metadata": {},
134+
"outputs": [
135+
{
136+
"name": "stdout",
137+
"output_type": "stream",
138+
"text": [
139+
"FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n",
140+
"2023-05-17 15:11:12 [INFO] Found total 2 images to run on\n",
141+
"2023-05-17 15:11:12 [INFO] 0) Finished write_index() NN model\n",
142+
"2023-05-17 15:11:12 [INFO] Stored nn model index file embedding_input/nnf.index\n",
143+
"2023-05-17 15:11:12 [INFO] Total time took 64 ms\n",
144+
"2023-05-17 15:11:12 [INFO] Found a total of 0 fully identical images (d>0.990), which are 0.00 %\n",
145+
"2023-05-17 15:11:12 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n",
146+
"2023-05-17 15:11:12 [INFO] Found a total of 0 above threshold images (d>0.900), which are 0.00 %\n",
147+
"2023-05-17 15:11:12 [INFO] Found a total of 1 outlier images (d<0.050), which are 50.00 %\n",
148+
"2023-05-17 15:11:12 [INFO] Min distance found 0.733 max distance 0.733\n",
149+
"2023-05-17 15:11:12 [INFO] Running connected components for ccthreshold 0.960000 \n",
150+
".0"
151+
]
152+
},
153+
{
154+
"data": {
155+
"text/plain": [
156+
"0"
157+
]
158+
},
159+
"execution_count": 2,
160+
"metadata": {},
161+
"output_type": "execute_result"
162+
}
163+
],
164+
"source": [
165+
"# replace the below code with computation of your own features\n",
166+
"matrix = np.random.rand(2, 576).astype('float32')\n",
167+
"\n",
168+
"# save the embedding along the filenames into a working folder\n",
169+
"!mkdir -p embedding_input\n",
170+
"fastdup.save_binary_feature('embedding_input', flist, matrix)\n",
171+
"\n",
172+
"\n",
173+
"fastdup.run('~/visual_database/cxx/unittests/two_images/', run_mode=2, work_dir='embedding_input')"
174+
]
175+
},
176+
{
177+
"cell_type": "markdown",
178+
"id": "b04faace-8fdd-4d4b-8c6b-24f55ac89f89",
179+
"metadata": {},
180+
"source": [
181+
"## Version 1.0"
182+
]
183+
},
184+
{
185+
"cell_type": "code",
186+
"execution_count": 7,
187+
"id": "2ce1e4aa-c57f-4a26-bef2-0b398da19d1e",
188+
"metadata": {},
189+
"outputs": [
190+
{
191+
"name": "stdout",
192+
"output_type": "stream",
193+
"text": [
194+
"FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n",
195+
"2023-05-17 15:13:40 [INFO] Found total 2 images to run on\n",
196+
"2023-05-17 15:13:40 [INFO] 0) Finished write_index() NN model\n",
197+
"2023-05-17 15:13:40 [INFO] Stored nn model index file out3/nnf.index\n",
198+
"2023-05-17 15:13:40 [INFO] Total time took 65 ms\n",
199+
"2023-05-17 15:13:40 [INFO] Found a total of 0 fully identical images (d>0.990), which are 0.00 %\n",
200+
"2023-05-17 15:13:40 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n",
201+
"2023-05-17 15:13:40 [INFO] Found a total of 0 above threshold images (d>0.900), which are 0.00 %\n",
202+
"2023-05-17 15:13:40 [INFO] Found a total of 1 outlier images (d<0.050), which are 50.00 %\n",
203+
"2023-05-17 15:13:40 [INFO] Min distance found 0.733 max distance 0.733\n",
204+
"2023-05-17 15:13:40 [INFO] Running connected components for ccthreshold 0.960000 \n",
205+
".0"
206+
]
207+
}
208+
],
209+
"source": [
210+
"# Note: files should contain absolute path and not relative path\n",
211+
"import pandas as pd\n",
212+
"df = pd.DataFrame({'filename':flist})\n",
213+
"fd2 = fastdup.create(input_dir='/Users/dannybickson/visual_database/cxx/unittests/two_images/', work_dir='out3')\n",
214+
"fd2.run(annotations=df, embeddings=matrix, print_summary=False, overwrite=True)\n",
215+
"\n"
216+
]
217+
},
218+
{
219+
"cell_type": "code",
220+
"execution_count": null,
221+
"id": "44acb813-730b-4513-9266-17e0348f8584",
222+
"metadata": {},
223+
"outputs": [],
224+
"source": []
225+
}
226+
],
227+
"metadata": {
228+
"kernelspec": {
229+
"display_name": "Python 3 (ipykernel)",
230+
"language": "python",
231+
"name": "python3"
232+
},
233+
"language_info": {
234+
"codemirror_mode": {
235+
"name": "ipython",
236+
"version": 3
237+
},
238+
"file_extension": ".py",
239+
"mimetype": "text/x-python",
240+
"name": "python",
241+
"nbconvert_exporter": "python",
242+
"pygments_lexer": "ipython3",
243+
"version": "3.8.16"
244+
}
245+
},
246+
"nbformat": 4,
247+
"nbformat_minor": 5
248+
}

0 commit comments

Comments
 (0)