diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index a585dd06d..b52a37702 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -45,10 +45,8 @@ jobs:
           cd jni/external/faiss
           git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch
           rm ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch
-          git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch
-          rm ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch
-          git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch
-          rm ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch
+          git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0002-Custom-patch-to-support-AVX2-Linux-CI.patch
+          rm ../../patches/faiss/0002-Custom-patch-to-support-AVX2-Linux-CI.patch
         working-directory: ${{ github.workspace }}
 
       - name: Setup Java ${{ matrix.java }}
diff --git a/.github/workflows/test_security.yml b/.github/workflows/test_security.yml
index cf83185f6..643552512 100644
--- a/.github/workflows/test_security.yml
+++ b/.github/workflows/test_security.yml
@@ -45,10 +45,8 @@ jobs:
           cd jni/external/faiss
           git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch
           rm ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch
-          git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch
-          rm ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch
-          git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch
-          rm ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch
+          git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0002-Custom-patch-to-support-AVX2-Linux-CI.patch
+          rm ../../patches/faiss/0002-Custom-patch-to-support-AVX2-Linux-CI.patch
         working-directory: ${{ github.workspace }}
 
       - name: Setup Java ${{ matrix.java }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6420f56b1..65e3c8869 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,4 +23,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ### Maintenance
 * Bump faiss lib commit to 32f0e8cf92cd2275b60364517bb1cce67aa29a55 [#1443](https://github.com/opensearch-project/k-NN/pull/1443)
 * Fix FieldInfo Parameters Mismatch [#1489](https://github.com/opensearch-project/k-NN/pull/1489)
+* Upgrade faiss to 12b92e9 [#1509](https://github.com/opensearch-project/k-NN/pull/1509)
 ### Refactoring
diff --git a/jni/CMakeLists.txt b/jni/CMakeLists.txt
index 0f0b58738..c06337338 100644
--- a/jni/CMakeLists.txt
+++ b/jni/CMakeLists.txt
@@ -154,20 +154,12 @@ if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} S
     endif ()
 
     # Check if patch exist, this is to skip git apply during CI build. See CI.yml with ubuntu.
-    find_path(PATCH_FILE NAMES 0001-Custom-patch-to-support-multi-vector.patch 0002-Custom-patch-to-support-sqfp16-neon.patch PATHS ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss NO_DEFAULT_PATH)
+    find_path(PATCH_FILE NAMES 0001-Custom-patch-to-support-multi-vector.patch PATHS ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss NO_DEFAULT_PATH)
 
     # If it exists, apply patches
     if (EXISTS ${PATCH_FILE})
         message(STATUS "Applying custom patches.")
         execute_process(COMMAND git apply --ignore-space-change --ignore-whitespace --3way ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0001-Custom-patch-to-support-multi-vector.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE)
-
-        # 0002-Custom-patch-to-support-sqfp16-neon.patch is a temporary patch to add NEON support to SQ.
-        # Once the commit conflict issues wrt to Multi vector are resolved, this patch can be removed by updating the faiss submodule with corresponding commit.
-        # Apply the patch if the OS is not Windows and Processor is aarch64.
-        if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL Windows AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" AND ${SIMD_ENABLED})
-            execute_process(COMMAND git apply --ignore-space-change --ignore-whitespace --3way ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE)
-        endif()
-
         if(RESULT_CODE)
             message(FATAL_ERROR "Failed to apply patch:\n${ERROR_MSG}")
         endif()
diff --git a/jni/external/faiss b/jni/external/faiss
index 32f0e8cf9..12b92e9fa 160000
--- a/jni/external/faiss
+++ b/jni/external/faiss
@@ -1 +1 @@
-Subproject commit 32f0e8cf92cd2275b60364517bb1cce67aa29a55
+Subproject commit 12b92e9fa5d8e8fb3da53c57af9ff007c826b1ee
diff --git a/jni/patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch b/jni/patches/faiss/0002-Custom-patch-to-support-AVX2-Linux-CI.patch
similarity index 100%
rename from jni/patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch
rename to jni/patches/faiss/0002-Custom-patch-to-support-AVX2-Linux-CI.patch
diff --git a/jni/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch b/jni/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch
deleted file mode 100644
index d743d0a97..000000000
--- a/jni/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch
+++ /dev/null
@@ -1,495 +0,0 @@
-diff --git a/faiss/CMakeLists.txt b/faiss/CMakeLists.txt
-index db5133d6..22dac7cb 100644
---- a/faiss/CMakeLists.txt
-+++ b/faiss/CMakeLists.txt
-@@ -189,6 +189,7 @@ set(FAISS_HEADERS
-   utils/extra_distances.h
-   utils/fp16-fp16c.h
-   utils/fp16-inl.h
-+  utils/fp16-arm.h
-   utils/fp16.h
-   utils/hamming-inl.h
-   utils/hamming.h
-diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp
-index fc7b28ef..07d77d56 100644
---- a/faiss/impl/ScalarQuantizer.cpp
-+++ b/faiss/impl/ScalarQuantizer.cpp
-@@ -91,6 +91,20 @@ struct Codec8bit {
-         return _mm256_fmadd_ps(f8, one_255, half_one_255);
-     }
- #endif
-+
-+#ifdef __aarch64__
-+    static FAISS_ALWAYS_INLINE float32x4x2_t
-+    decode_8_components(const uint8_t* code, int i) {
-+        float32_t result[8] = {};
-+        for (size_t j = 0; j < 8; j++) {
-+            result[j] = decode_component(code, i + j);
-+        }
-+        float32x4_t res1 = vld1q_f32(result);
-+        float32x4_t res2 = vld1q_f32(result + 4);
-+        float32x4x2_t res = vzipq_f32(res1, res2);
-+        return vuzpq_f32(res.val[0], res.val[1]);
-+    }
-+#endif
- };
- 
- struct Codec4bit {
-@@ -129,6 +143,20 @@ struct Codec4bit {
-         return _mm256_mul_ps(f8, one_255);
-     }
- #endif
-+
-+#ifdef __aarch64__
-+    static FAISS_ALWAYS_INLINE float32x4x2_t
-+    decode_8_components(const uint8_t* code, int i) {
-+        float32_t result[8] = {};
-+        for (size_t j = 0; j < 8; j++) {
-+            result[j] = decode_component(code, i + j);
-+        }
-+        float32x4_t res1 = vld1q_f32(result);
-+        float32x4_t res2 = vld1q_f32(result + 4);
-+        float32x4x2_t res = vzipq_f32(res1, res2);
-+        return vuzpq_f32(res.val[0], res.val[1]);
-+    }
-+#endif
- };
- 
- struct Codec6bit {
-@@ -228,6 +256,20 @@ struct Codec6bit {
-     }
- 
- #endif
-+
-+#ifdef __aarch64__
-+    static FAISS_ALWAYS_INLINE float32x4x2_t
-+    decode_8_components(const uint8_t* code, int i) {
-+        float32_t result[8] = {};
-+        for (size_t j = 0; j < 8; j++) {
-+            result[j] = decode_component(code, i + j);
-+        }
-+        float32x4_t res1 = vld1q_f32(result);
-+        float32x4_t res2 = vld1q_f32(result + 4);
-+        float32x4x2_t res = vzipq_f32(res1, res2);
-+        return vuzpq_f32(res.val[0], res.val[1]);
-+    }
-+#endif
- };
- 
- /*******************************************************************
-@@ -293,6 +335,31 @@ struct QuantizerTemplate<Codec, true, 8> : QuantizerTemplate<Codec, true, 1> {
- 
- #endif
- 
-+#ifdef __aarch64__
-+
-+template <class Codec>
-+struct QuantizerTemplate<Codec, true, 8> : QuantizerTemplate<Codec, true, 1> {
-+    QuantizerTemplate(size_t d, const std::vector<float>& trained)
-+            : QuantizerTemplate<Codec, true, 1>(d, trained) {}
-+
-+    FAISS_ALWAYS_INLINE float32x4x2_t
-+    reconstruct_8_components(const uint8_t* code, int i) const {
-+        float32x4x2_t xi = Codec::decode_8_components(code, i);
-+        float32x4x2_t res = vzipq_f32(
-+                vfmaq_f32(
-+                        vdupq_n_f32(this->vmin),
-+                        xi.val[0],
-+                        vdupq_n_f32(this->vdiff)),
-+                vfmaq_f32(
-+                        vdupq_n_f32(this->vmin),
-+                        xi.val[1],
-+                        vdupq_n_f32(this->vdiff)));
-+        return vuzpq_f32(res.val[0], res.val[1]);
-+    }
-+};
-+
-+#endif
-+
- template <class Codec>
- struct QuantizerTemplate<Codec, false, 1> : ScalarQuantizer::SQuantizer {
-     const size_t d;
-@@ -350,6 +417,29 @@ struct QuantizerTemplate<Codec, false, 8> : QuantizerTemplate<Codec, false, 1> {
- 
- #endif
- 
-+#ifdef __aarch64__
-+
-+template <class Codec>
-+struct QuantizerTemplate<Codec, false, 8> : QuantizerTemplate<Codec, false, 1> {
-+    QuantizerTemplate(size_t d, const std::vector<float>& trained)
-+            : QuantizerTemplate<Codec, false, 1>(d, trained) {}
-+
-+    FAISS_ALWAYS_INLINE float32x4x2_t
-+    reconstruct_8_components(const uint8_t* code, int i) const {
-+        float32x4x2_t xi = Codec::decode_8_components(code, i);
-+
-+        float32x4x2_t vmin_8 = vld1q_f32_x2(this->vmin + i);
-+        float32x4x2_t vdiff_8 = vld1q_f32_x2(this->vdiff + i);
-+
-+        float32x4x2_t res = vzipq_f32(
-+                vfmaq_f32(vmin_8.val[0], xi.val[0], vdiff_8.val[0]),
-+                vfmaq_f32(vmin_8.val[1], xi.val[1], vdiff_8.val[1]));
-+        return vuzpq_f32(res.val[0], res.val[1]);
-+    }
-+};
-+
-+#endif
-+
- /*******************************************************************
-  * FP16 quantizer
-  *******************************************************************/
-@@ -397,6 +487,23 @@ struct QuantizerFP16<8> : QuantizerFP16<1> {
- 
- #endif
- 
-+#ifdef __aarch64__
-+
-+template <>
-+struct QuantizerFP16<8> : QuantizerFP16<1> {
-+    QuantizerFP16(size_t d, const std::vector<float>& trained)
-+            : QuantizerFP16<1>(d, trained) {}
-+
-+    FAISS_ALWAYS_INLINE float32x4x2_t
-+    reconstruct_8_components(const uint8_t* code, int i) const {
-+        uint16x4x2_t codei = vld2_u16((const uint16_t*)(code + 2 * i));
-+        return vzipq_f32(
-+                vcvt_f32_f16(vreinterpret_f16_u16(codei.val[0])),
-+                vcvt_f32_f16(vreinterpret_f16_u16(codei.val[1])));
-+    }
-+};
-+#endif
-+
- /*******************************************************************
-  * 8bit_direct quantizer
-  *******************************************************************/
-@@ -446,6 +553,28 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
- 
- #endif
- 
-+#ifdef __aarch64__
-+
-+template <>
-+struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
-+    Quantizer8bitDirect(size_t d, const std::vector<float>& trained)
-+            : Quantizer8bitDirect<1>(d, trained) {}
-+
-+    FAISS_ALWAYS_INLINE float32x4x2_t
-+    reconstruct_8_components(const uint8_t* code, int i) const {
-+        float32_t result[8] = {};
-+        for (size_t j = 0; j < 8; j++) {
-+            result[j] = code[i + j];
-+        }
-+        float32x4_t res1 = vld1q_f32(result);
-+        float32x4_t res2 = vld1q_f32(result + 4);
-+        float32x4x2_t res = vzipq_f32(res1, res2);
-+        return vuzpq_f32(res.val[0], res.val[1]);
-+    }
-+};
-+
-+#endif
-+
- template <int SIMDWIDTH>
- ScalarQuantizer::SQuantizer* select_quantizer_1(
-         QuantizerType qtype,
-@@ -728,6 +857,59 @@ struct SimilarityL2<8> {
- 
- #endif
- 
-+#ifdef __aarch64__
-+template <>
-+struct SimilarityL2<8> {
-+    static constexpr int simdwidth = 8;
-+    static constexpr MetricType metric_type = METRIC_L2;
-+
-+    const float *y, *yi;
-+    explicit SimilarityL2(const float* y) : y(y) {}
-+    float32x4x2_t accu8;
-+
-+    FAISS_ALWAYS_INLINE void begin_8() {
-+        accu8 = vzipq_f32(vdupq_n_f32(0.0f), vdupq_n_f32(0.0f));
-+        yi = y;
-+    }
-+
-+    FAISS_ALWAYS_INLINE void add_8_components(float32x4x2_t x) {
-+        float32x4x2_t yiv = vld1q_f32_x2(yi);
-+        yi += 8;
-+
-+        float32x4_t sub0 = vsubq_f32(yiv.val[0], x.val[0]);
-+        float32x4_t sub1 = vsubq_f32(yiv.val[1], x.val[1]);
-+
-+        float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], sub0, sub0);
-+        float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], sub1, sub1);
-+
-+        float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
-+        accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
-+    }
-+
-+    FAISS_ALWAYS_INLINE void add_8_components_2(
-+            float32x4x2_t x,
-+            float32x4x2_t y) {
-+        float32x4_t sub0 = vsubq_f32(y.val[0], x.val[0]);
-+        float32x4_t sub1 = vsubq_f32(y.val[1], x.val[1]);
-+
-+        float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], sub0, sub0);
-+        float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], sub1, sub1);
-+
-+        float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
-+        accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
-+    }
-+
-+    FAISS_ALWAYS_INLINE float result_8() {
-+        float32x4_t sum_0 = vpaddq_f32(accu8.val[0], accu8.val[0]);
-+        float32x4_t sum_1 = vpaddq_f32(accu8.val[1], accu8.val[1]);
-+
-+        float32x4_t sum2_0 = vpaddq_f32(sum_0, sum_0);
-+        float32x4_t sum2_1 = vpaddq_f32(sum_1, sum_1);
-+        return vgetq_lane_f32(sum2_0, 0) + vgetq_lane_f32(sum2_1, 0);
-+    }
-+};
-+#endif
-+
- template <int SIMDWIDTH>
- struct SimilarityIP {};
- 
-@@ -801,6 +983,56 @@ struct SimilarityIP<8> {
- };
- #endif
- 
-+#ifdef __aarch64__
-+
-+template <>
-+struct SimilarityIP<8> {
-+    static constexpr int simdwidth = 8;
-+    static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
-+
-+    const float *y, *yi;
-+
-+    explicit SimilarityIP(const float* y) : y(y) {}
-+    float32x4x2_t accu8;
-+
-+    FAISS_ALWAYS_INLINE void begin_8() {
-+        accu8 = vzipq_f32(vdupq_n_f32(0.0f), vdupq_n_f32(0.0f));
-+        yi = y;
-+    }
-+
-+    FAISS_ALWAYS_INLINE void add_8_components(float32x4x2_t x) {
-+        float32x4x2_t yiv = vld1q_f32_x2(yi);
-+        yi += 8;
-+
-+        float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], yiv.val[0], x.val[0]);
-+        float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], yiv.val[1], x.val[1]);
-+        float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
-+        accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
-+    }
-+
-+    FAISS_ALWAYS_INLINE void add_8_components_2(
-+            float32x4x2_t x1,
-+            float32x4x2_t x2) {
-+        float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], x1.val[0], x2.val[0]);
-+        float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], x1.val[1], x2.val[1]);
-+        float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
-+        accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
-+    }
-+
-+    FAISS_ALWAYS_INLINE float result_8() {
-+        float32x4x2_t sum_tmp = vzipq_f32(
-+                vpaddq_f32(accu8.val[0], accu8.val[0]),
-+                vpaddq_f32(accu8.val[1], accu8.val[1]));
-+        float32x4x2_t sum = vuzpq_f32(sum_tmp.val[0], sum_tmp.val[1]);
-+        float32x4x2_t sum2_tmp = vzipq_f32(
-+                vpaddq_f32(sum.val[0], sum.val[0]),
-+                vpaddq_f32(sum.val[1], sum.val[1]));
-+        float32x4x2_t sum2 = vuzpq_f32(sum2_tmp.val[0], sum2_tmp.val[1]);
-+        return vgetq_lane_f32(sum2.val[0], 0) + vgetq_lane_f32(sum2.val[1], 0);
-+    }
-+};
-+#endif
-+
- /*******************************************************************
-  * DistanceComputer: combines a similarity and a quantizer to do
-  * code-to-vector or code-to-code comparisons
-@@ -903,6 +1135,53 @@ struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
- 
- #endif
- 
-+#ifdef __aarch64__
-+
-+template <class Quantizer, class Similarity>
-+struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
-+    using Sim = Similarity;
-+
-+    Quantizer quant;
-+
-+    DCTemplate(size_t d, const std::vector<float>& trained)
-+            : quant(d, trained) {}
-+    float compute_distance(const float* x, const uint8_t* code) const {
-+        Similarity sim(x);
-+        sim.begin_8();
-+        for (size_t i = 0; i < quant.d; i += 8) {
-+            float32x4x2_t xi = quant.reconstruct_8_components(code, i);
-+            sim.add_8_components(xi);
-+        }
-+        return sim.result_8();
-+    }
-+
-+    float compute_code_distance(const uint8_t* code1, const uint8_t* code2)
-+            const {
-+        Similarity sim(nullptr);
-+        sim.begin_8();
-+        for (size_t i = 0; i < quant.d; i += 8) {
-+            float32x4x2_t x1 = quant.reconstruct_8_components(code1, i);
-+            float32x4x2_t x2 = quant.reconstruct_8_components(code2, i);
-+            sim.add_8_components_2(x1, x2);
-+        }
-+        return sim.result_8();
-+    }
-+
-+    void set_query(const float* x) final {
-+        q = x;
-+    }
-+
-+    float symmetric_dis(idx_t i, idx_t j) override {
-+        return compute_code_distance(
-+                codes + i * code_size, codes + j * code_size);
-+    }
-+
-+    float query_to_code(const uint8_t* code) const final {
-+        return compute_distance(q, code);
-+    }
-+};
-+#endif
-+
- /*******************************************************************
-  * DistanceComputerByte: computes distances in the integer domain
-  *******************************************************************/
-@@ -1019,6 +1298,54 @@ struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
- 
- #endif
- 
-+#ifdef __aarch64__
-+
-+template <class Similarity>
-+struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
-+    using Sim = Similarity;
-+
-+    int d;
-+    std::vector<uint8_t> tmp;
-+
-+    DistanceComputerByte(int d, const std::vector<float>&) : d(d), tmp(d) {}
-+
-+    int compute_code_distance(const uint8_t* code1, const uint8_t* code2)
-+            const {
-+        int accu = 0;
-+        for (int i = 0; i < d; i++) {
-+            if (Sim::metric_type == METRIC_INNER_PRODUCT) {
-+                accu += int(code1[i]) * code2[i];
-+            } else {
-+                int diff = int(code1[i]) - code2[i];
-+                accu += diff * diff;
-+            }
-+        }
-+        return accu;
-+    }
-+
-+    void set_query(const float* x) final {
-+        for (int i = 0; i < d; i++) {
-+            tmp[i] = int(x[i]);
-+        }
-+    }
-+
-+    int compute_distance(const float* x, const uint8_t* code) {
-+        set_query(x);
-+        return compute_code_distance(tmp.data(), code);
-+    }
-+
-+    float symmetric_dis(idx_t i, idx_t j) override {
-+        return compute_code_distance(
-+                codes + i * code_size, codes + j * code_size);
-+    }
-+
-+    float query_to_code(const uint8_t* code) const final {
-+        return compute_code_distance(tmp.data(), code);
-+    }
-+};
-+
-+#endif
-+
- /*******************************************************************
-  * select_distance_computer: runtime selection of template
-  * specialization
-@@ -1155,7 +1482,7 @@ void ScalarQuantizer::train(size_t n, const float* x) {
- }
- 
- ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
--#ifdef USE_F16C
-+#if defined(USE_F16C) || defined(__aarch64__)
-     if (d % 8 == 0) {
-         return select_quantizer_1<8>(qtype, d, trained);
-     } else
-@@ -1186,7 +1513,7 @@ void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
- SQDistanceComputer* ScalarQuantizer::get_distance_computer(
-         MetricType metric) const {
-     FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
--#ifdef USE_F16C
-+#if defined(USE_F16C) || defined(__aarch64__)
-     if (d % 8 == 0) {
-         if (metric == METRIC_L2) {
-             return select_distance_computer<SimilarityL2<8>>(qtype, d, trained);
-@@ -1522,7 +1849,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner(
-         bool store_pairs,
-         const IDSelector* sel,
-         bool by_residual) const {
--#ifdef USE_F16C
-+#if defined(USE_F16C) || defined(__aarch64__)
-     if (d % 8 == 0) {
-         return sel0_InvertedListScanner<8>(
-                 mt, this, quantizer, store_pairs, sel, by_residual);
-diff --git a/faiss/utils/fp16-arm.h b/faiss/utils/fp16-arm.h
-new file mode 100644
-index 00000000..79c885b0
---- /dev/null
-+++ b/faiss/utils/fp16-arm.h
-@@ -0,0 +1,29 @@
-+/**
-+ * Copyright (c) Facebook, Inc. and its affiliates.
-+ *
-+ * This source code is licensed under the MIT license found in the
-+ * LICENSE file in the root directory of this source tree.
-+ */
-+
-+#pragma once
-+
-+#include <arm_neon.h>
-+#include <cstdint>
-+
-+namespace faiss {
-+
-+inline uint16_t encode_fp16(float x) {
-+    float32x4_t fx4 = vdupq_n_f32(x);
-+    float16x4_t f16x4 = vcvt_f16_f32(fx4);
-+    uint16x4_t ui16x4 = vreinterpret_u16_f16(f16x4);
-+    return vduph_lane_u16(ui16x4, 3);
-+}
-+
-+inline float decode_fp16(uint16_t x) {
-+    uint16x4_t ui16x4 = vdup_n_u16(x);
-+    float16x4_t f16x4 = vreinterpret_f16_u16(ui16x4);
-+    float32x4_t fx4 = vcvt_f32_f16(f16x4);
-+    return vdups_laneq_f32(fx4, 3);
-+}
-+
-+} // namespace faiss
-diff --git a/faiss/utils/fp16.h b/faiss/utils/fp16.h
-index 90691d8f..43e05dc3 100644
---- a/faiss/utils/fp16.h
-+++ b/faiss/utils/fp16.h
-@@ -13,6 +13,8 @@
- 
- #if defined(__F16C__)
- #include <faiss/utils/fp16-fp16c.h>
-+#elif defined(__aarch64__)
-+#include <faiss/utils/fp16-arm.h>
- #else
- #include <faiss/utils/fp16-inl.h>
- #endif
diff --git a/src/main/java/org/opensearch/knn/index/util/Faiss.java b/src/main/java/org/opensearch/knn/index/util/Faiss.java
index 420288033..3b21488b9 100644
--- a/src/main/java/org/opensearch/knn/index/util/Faiss.java
+++ b/src/main/java/org/opensearch/knn/index/util/Faiss.java
@@ -56,6 +56,9 @@
  */
 class Faiss extends NativeLibrary {
 
+    // TODO: Current version is not really current version. Instead, it encodes information in the file name
+    // about the compatibility version the file is created with. In the future, we should refactor this so that it
+    // makes sense. See https://github.com/opensearch-project/k-NN/issues/1515 for more details.
     private final static String CURRENT_VERSION = "165";
 
     // Map that overrides OpenSearch score translation by space type of scores returned by faiss