dhchenx
diff --git a/‎LICENSE.txt renamed to ‎LICENSE
+10-8 b/‎LICENSE.txt renamed to ‎LICENSE
+10-8
diff --git a/‎MANIFEST.in
+1-3 b/‎MANIFEST.in
+1-3
diff --git a/‎README.md
+3-3 b/‎README.md
+3-3
diff --git a/‎examples/birds_features_lib/evaluate/step3_use_index.py
+82 b/‎examples/birds_features_lib/evaluate/step3_use_index.py
+82
diff --git a/‎examples/birds_features_lib/evaluate/step4_create_image_index.py
+38 b/‎examples/birds_features_lib/evaluate/step4_create_image_index.py
+38
diff --git a/‎examples/birds_features_lib/evaluate/step5_search_image_index.py
+50 b/‎examples/birds_features_lib/evaluate/step5_search_image_index.py
+50
diff --git a/‎examples/birds_features_lib/evaluate/step6_search_image_index_simplified.py
+35 b/‎examples/birds_features_lib/evaluate/step6_search_image_index_simplified.py
+35
diff --git a/‎examples/birds_features_lib/evaluate/test_inverted_index.py
+35 b/‎examples/birds_features_lib/evaluate/test_inverted_index.py
+35
diff --git a/‎examples/birds_features_lib/evaluate/test_positional_index.py
+46 b/‎examples/birds_features_lib/evaluate/test_positional_index.py
+46
diff --git a/‎examples/birds_features_lib/step1_create_bird_lib.py
-3 b/‎examples/birds_features_lib/step1_create_bird_lib.py
-3
diff --git a/‎examples/birds_features_lib/step5_search_image_index.py
+3-3 b/‎examples/birds_features_lib/step5_search_image_index.py
+3-3
@@ -1,11 +1,13 @@
-Copyright (c) 2016 The Python Packaging Authority (PyPA)
+MIT License
 
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
+Copyright (c) 2022 The Python Packaging Authority
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
 
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
@@ -16,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
@@ -1,10 +1,8 @@
-include pyproject.toml
-
 # Include the README
 include *.md
 
 # Include the license file
-include LICENSE.txt
+include LICENSE
 
 # Include setup.py
 include setup.py
 
@@ -3,7 +3,7 @@
 A light-weight Python library to extract, fuse and store multimodal features for deep learning.
 
 ## Objectives
-1. To extract and fuse various features from multimodal datasets in a rapid and easy manner;
+1. To extract, store and fuse various features from multimodal datasets in a rapid and easy manner;
 2. To provide a common foundation framework for storage and retrieving of multimodal data. 
 
 ## Modalities
@@ -15,7 +15,7 @@ The modalities to support include:
 5. Cross-modality between above
 
 ## Usage
-A toy example showing how to build a multimodal features library is here:
+A toy example showing how to build a multimodal feature (MMF) library is here:
 
 ```python
 from mmkfeatures.fusion.mm_features_lib import MMFeaturesLib
@@ -47,7 +47,7 @@ if __name__ == "__main__":
 ```
 
 ## Credits
-The project's source codes come from various open-source projects, we will include a list of their contribution and our improvement.
+The project's source codes come from various open-source projects, we will include a list of their contributions and our improvement.
 
 1. [A2Zadeh/CMU-MultimodalSDK](https://github.com/A2Zadeh/CMU-MultimodalSDK)
 2. [aishoot/Speech_Feature_Extraction](https://github.com/aishoot/Speech_Feature_Extraction)
 
@@ -0,0 +1,82 @@
+from mmkfeatures.fusion.mm_features_lib import MMFeaturesLib
+import time
+
+mmf_file=f"../datasets/birds.mmf"
+
+def get_exact_match_ratio(lib,result):
+    num_match = 0
+    for key in result:
+        content = lib.get_content_by_id(key)
+        text = content["text"][()].decode("utf-8", "ignore")
+        # print(key, text)
+        if query_str in text:
+            num_match += 1
+    print("p = ", round(num_match * 1.0 / len(result), 4))
+    return  round(num_match * 1.0 / len(result), 4)
+
+list_result=[]
+start_time=time.time()
+
+
+print("loading mmf files...")
+birds_lib=MMFeaturesLib(file_path=mmf_file)
+
+time_load=time.time()-start_time
+list_result.append(("load",time_load))
+
+print("creating plain text index...")
+start_time=time.time()
+birds_lib.to_index_file("text","../datasets/text.index",index_type="brutal_force")
+time_brutal_force=time.time()-start_time
+
+
+list_result.append(("brutal force indexing",time_brutal_force))
+
+print("creating inverted index....")
+start_time=time.time()
+birds_lib.to_index_file("text","../datasets/text_inverted.index",index_type="inverted_index")
+time_inverted=time.time()-start_time
+list_result.append(("inverted indexing",time_inverted))
+
+print("creating positional text...")
+start_time=time.time()
+birds_lib.to_index_file("text","../datasets/text_positional.index",index_type="positional_index")
+time_positional=time.time()-start_time
+list_result.append(("positional indexing",time_positional))
+
+# start to perform search test
+query_str="large brown wings"
+
+print("searching plain index test....")
+start_time=time.time()
+result_bf=birds_lib.search_index(index_file_path="../datasets/text.index",query=query_str,search_type="brutal_force")
+print(result_bf)
+search_time_brutal=time.time()-start_time
+list_result.append(("brutal force searching",search_time_brutal,get_exact_match_ratio(birds_lib,result_bf)))
+
+print("searching inverted index test....")
+start_time=time.time()
+result_bf=birds_lib.search_index(index_file_path="../datasets/text_inverted.index",query=query_str,search_type="inverted_index")
+print(result_bf)
+search_time_inverted=time.time()-start_time
+list_result.append(("inverted index searching",search_time_inverted,get_exact_match_ratio(birds_lib,result_bf)))
+
+print("searching positional index test....")
+start_time=time.time()
+result_bf=birds_lib.search_index(index_file_path="../datasets/text_positional.index",query=query_str,search_type="positional_index")
+print(result_bf)
+search_time_positional=time.time()-start_time
+list_result.append(("positional index searching",search_time_positional,get_exact_match_ratio(birds_lib,result_bf)))
+print()
+print("Indexing method\tTime cost\tExact match ratio")
+for result in list_result:
+    if len(result)==3:
+        print(f"{result[0]}\t{result[1]}\t{result[2]}")
+
+print()
+print("Loading method\tTime cost")
+for result in list_result:
+    if len(result)==2:
+        print(f"{result[0]}\t{result[1]}")
+
+
@@ -0,0 +1,38 @@
+from mmkfeatures.fusion.mm_features_lib import MMFeaturesLib
+from mmkfeatures.image.color_descriptor import ColorDescriptor
+import numpy as  np
+import cv2
+import pickle
+from tqdm import tqdm
+import time
+
+mmf_file=f"../datasets/birds_raw.mmf"
+
+feature_lib=MMFeaturesLib(file_path=mmf_file)
+
+data=feature_lib.get_data()
+
+cd = ColorDescriptor((8, 12, 3))
+
+list_features=[]
+
+print(data.keys())
+start_time=time.time()
+f_out=open("../datasets/image.index","w")
+for cid in tqdm(data.keys()):
+    item=data[cid]
+    imgs=item["objects"]
+    for img_id in imgs:
+        # print(image)
+        img=imgs[img_id][()]
+        # print(img)
+        # print(type(img))
+        features = cd.describe(img)
+        features = [str(f) for f in features]
+        # print(feature)
+        feature_str=cid+","+",".join(features)
+        f_out.write(feature_str+"\n")
+f_out.close()
+end_time=time.time()
+time_cost=end_time-start_time
+print("creating image index: ",time_cost)
@@ -0,0 +1,50 @@
+import time
+
+from mmkfeatures.fusion.mm_features_lib import MMFeaturesLib
+from mmkfeatures.image.color_descriptor import ColorDescriptor
+import numpy as  np
+import cv2
+import pickle
+from tqdm import tqdm
+from mmkfeatures.image.image_searcher import Searcher
+
+mmf_file=f"../datasets/birds_raw.mmf"
+
+feature_lib=MMFeaturesLib(file_path=mmf_file)
+
+data=feature_lib.get_data()
+
+
+# initialize the image descriptor
+cd = ColorDescriptor((8, 12, 3))
+
+img_path="../datasets/CUB_200_2011/images/005.Crested_Auklet/Crested_Auklet_0001_794941.jpg"
+index_path="../datasets/image.index"
+
+print("Searching....")
+
+
+# load the query image and describe it
+query = cv2.imread(img_path)
+features = cd.describe(query)
+# perform the search
+start_time=time.time()
+searcher = Searcher(index_path)
+results = searcher.search(features)
+end_time=time.time()
+
+print("query time cost: ",end_time-start_time)
+# display the query
+cv2.imshow("Query", query)
+cv2.waitKey(0)
+# loop over the results
+for (score, resultID) in results:
+    print(resultID,score)
+    image=data[str(resultID)]["objects"]["0"][()]
+    title=data[str(resultID)]["labels"][()][0]
+    cv2.imshow(str(title), image)
+    cv2.waitKey(0)
+    # load the result image and display it
+    # result = cv2.imread("datasets/CUB_200_2011/" + resultID)
+    # cv2.imshow("Result", result)
+    # cv2.waitKey(0)
@@ -0,0 +1,35 @@
+from mmkfeatures.fusion.mm_features_lib import MMFeaturesLib
+from mmkfeatures.image.color_descriptor import ColorDescriptor
+import cv2
+import time
+# load an existing multimodal feature lib
+mmf_file=f"../datasets/birds_raw.mmf"
+feature_lib=MMFeaturesLib(file_path=mmf_file)
+data=feature_lib.get_data()
+
+# set test image and index file's path
+test_img_path="../datasets/CUB_200_2011/images/005.Crested_Auklet/Crested_Auklet_0001_794941.jpg"
+index_path="../datasets/image.index"
+
+# create index
+feature_lib.to_obj_index(index_file=index_path,obj_field="objects",index_type="color_descriptor")
+
+# query index by color_descriptor
+cd = ColorDescriptor((8, 12, 3))
+query_image = cv2.imread(test_img_path)
+query_features = cd.describe(query_image)
+start_time=time.time()
+search_results=feature_lib.search_obj_index(index_file=index_path,features=query_features)
+end_time=time.time()
+print("simplified time cost: ",end_time-start_time)
+
+# loop over the results
+for (score, resultID) in search_results:
+    print(resultID,score)
+    content=feature_lib.get_content_by_id(resultID)
+    # print(content)
+    image=content["objects"]["0"][()]
+    title=content["labels"][()][0]
+    cv2.imshow(str(title), image)
+    cv2.waitKey(0)
+
@@ -0,0 +1,35 @@
+from mmkfeatures.fusion.mm_features_lib import MMFeaturesLib
+import time
+
+mmf_file=f"../datasets/birds.mmf"
+list_result=[]
+start_time=time.time()
+
+print("loading mmf files...")
+birds_lib=MMFeaturesLib(file_path=mmf_file)
+
+# creating inverted index
+birds_lib.to_index_file("text","../datasets/text_inverted.index",index_type="inverted_index")
+time_inverted=time.time()-start_time
+list_result.append(("inverted indexing",time_inverted))
+
+print("time cost of creating inverted: ",time.time()-start_time)
+
+# start to perform search test
+query_str="large brown wings"
+
+print("searching inverted index test....")
+start_time=time.time()
+result_bf=birds_lib.search_index(index_file_path="../datasets/text_inverted.index",query=query_str,search_type="inverted_index")
+print(result_bf)
+
+print("time cost of search inverted: ",time.time()-start_time)
+
+num_match=0
+for key in result_bf:
+    content=birds_lib.get_content_by_id(key)
+    text=content["text"][()].decode("utf-8","ignore")
+    print(key,text)
+    if query_str in text:
+        num_match+=1
+print("p = ",round(num_match*1.0/len(result_bf),4))
@@ -0,0 +1,46 @@
+from mmkfeatures.fusion.mm_features_lib import MMFeaturesLib
+import time
+
+mmf_file=f"../datasets/birds.mmf"
+list_result=[]
+start_time=time.time()
+
+print("loading mmf files...")
+birds_lib=MMFeaturesLib(file_path=mmf_file)
+print("loading ",time.time()-start_time)
+# creating inverted index
+print("creating positional text...")
+start_time=time.time()
+birds_lib.to_index_file("text","../datasets/text_positional.index",index_type="positional_index")
+
+time_positional=time.time()-start_time
+list_result.append(("positional indexing",time_positional))
+
+# start to perform search test
+query_str="large brown wings"
+
+
+print("searching positional index test....")
+start_time=time.time()
+result_bf=birds_lib.search_index(index_file_path="../datasets/text_positional.index",query=query_str,search_type="positional_index")
+print(result_bf)
+
+search_time_positional=time.time()-start_time
+
+print("search time: ",search_time_positional)
+
+# show results
+num_match=0
+for key in result_bf:
+    content=birds_lib.get_content_by_id(key)
+    text=content["text"][()].decode("utf-8","ignore")
+    print(key,text)
+    if query_str in text:
+        num_match+=1
+
+print("p = ",round(num_match*1.0/len(result_bf),4))
+
+# export key values
+
+# birds_lib.export_key_values("text",save_path="text.csv")
+
@@ -1,6 +1,3 @@
-import os
-import sys
-import csv
 from tqdm import tqdm
 import cv2
 import numpy as np
 
@@ -27,15 +27,15 @@
 searcher = Searcher(index_path)
 results = searcher.search(features)
 # display the query
-# cv2.imshow("Query", query)
-# cv2.waitKey(0)
+cv2.imshow("Query", query)
+cv2.waitKey(0)
 
 # loop over the results
 for (score, resultID) in results:
     print(resultID,score)
     image=data[str(resultID)]["objects"]["0"][()]
     title=data[str(resultID)]["labels"][()][0]
-    cv2.imshow(title, image)
+    cv2.imshow(str(title), image)
     cv2.waitKey(0)
     # load the result image and display it
     # result = cv2.imread("datasets/CUB_200_2011/" + resultID)