diff --git a/include/boost/algorithm/searching/apostolico_crochemore.hpp b/include/boost/algorithm/searching/apostolico_crochemore.hpp new file mode 100644 index 000000000..11e01d488 --- /dev/null +++ b/include/boost/algorithm/searching/apostolico_crochemore.hpp @@ -0,0 +1,143 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_APOSTOLICO_CROCHEMORE_HPP +#define BOOST_ALGORITHM_APOSTOLICO_CROCHEMORE_HPP + +#include +#include +#include + +#include +#include + +namespace boost { namespace algorithm { + +//function, which calcs shift table for a pattern +template ::value_type>> +void calcShiftTable(patIter pat_begin, patIter pat_end, std::vector& t, Predicate p = Predicate()) +{ + int i = 0, j = t[0] = -1; + patIter iter = pat_begin; + while (iter != pat_end) + { + while (j > -1 && !p(pat_begin[i], pat_begin[j])) + { + j = t[j]; + } + i++; + j++; + if (p(pat_begin[i], pat_begin[j])) + { + t[i] = t[j]; + } + else + { + t[i] = j; + } + ++iter; + } +} + +template +std::vector apostolico_crochemore_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int size_x = std::distance(pat_begin, pat_end), + size_y = std::distance(corp_begin, corp_end); + int l; + std::vector t(size_x); + std::vector v; + + //precalc step + calcShiftTable(pat_begin, pat_end, t); + //count l value + for (l = 1; pat_begin[l - 1] == pat_begin[l]; l++); + + if (l == size_x) + l = 0; + + //search step + int i = l, j = 0, k = 0; + while (j <= size_y - size_x) + { + // if x[i] = y[i + j], then next three is (i + 1, j, k) + while (i < size_x && pat_begin[i] == corp_begin[i + j]) + { + ++i; + } + if (i >= size_x) + { + // if k < l и x[k] = y[j + k], then next three is (i, j, k + 1) + while (k < l && pat_begin[k] == corp_begin[j + k]) + { + ++k; + } + // if k = l, then founded substring in j + if (k >= l) + { + v.push_back(corp_begin + j); + } + } + // calc new shift + j += i - t[i]; + if (i == l) + { + // if i = l и x[i] != y[i + j], then next three is (l, j + 1, max(0, k - 1)) + k = std::max(0, k - 1); + } + else + { + // if t[i] <= l, then next three is (l, i + j - t[i], max(0, t[i])) + if (t[i] <= l) + { + k = std::max(0, t[i]); + i = l; + } + // if t[i] > l, then next three is (t[i], i + j - t[i], l) + else + { + k = l; + i = t[i]; + } + } + } + return v; +} + + +template +std::vector::type> +apostolico_crochemore_search(const corpusRange& corp_range, patIter pat_begin, patIter pat_end) +{ + return apostolico_crochemore_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector apostolico_crochemore_search(corpusIter corp_begin, corpusIter corp_end, + const patRange& pat_range) +{ + return apostolico_crochemore_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +apostolico_crochemore_search(const corpusRange& corp_range, const patRange& pat_range) +{ + return apostolico_crochemore_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); +} + +} +} +#endif //BOOST_ALGORITHM_APOSTOLICO_CROCHEMORE_HPP diff --git a/include/boost/algorithm/searching/ebom.hpp b/include/boost/algorithm/searching/ebom.hpp new file mode 100644 index 000000000..1d5f9dd5f --- /dev/null +++ b/include/boost/algorithm/searching/ebom.hpp @@ -0,0 +1,130 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_EBOM_HPP +#define BOOST_ALGORITHM_EBOM_HPP + +#include +#include + +#define SIGMA 256 //constant alphabet size +#define UNDEFINED -1 + + +namespace boost { namespace algorithm { + + +template +std::vector ebom_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int m = std::distance(pat_begin, pat_end), + n = std::distance(corp_begin, corp_end); + std::vector> FT(SIGMA, std::vector(SIGMA)); + std::vector S(m); + std::vector> trans(m + 1, std::vector(SIGMA, UNDEFINED)); + int i, j, p, q; + int iMinus1, mMinus1; + unsigned char c; + //BEGIN_PREPROCESSING + S[m] = m + 1; + for (i = m; i > 0; --i) + { + iMinus1 = i - 1; + c = pat_begin[iMinus1]; + trans[i][c] = iMinus1; + p = S[i]; + while (p <= m && ((q = trans[p][c]) == UNDEFINED)) + { + trans[p][c] = iMinus1; + p = S[p]; + } + S[iMinus1] = (p == m + 1 ? m : q); + } + + /* Construct the FirstTransition table */ + for(size_t i = 0; i < SIGMA; ++i) + { + q = trans[m][i]; + for(size_t j = 0; j < SIGMA; ++j) + { + if (q >= 0) + FT[i][j] = trans[q][j]; + else + FT[i][j] = UNDEFINED; + } + } + //END_PREPROCESSING + + //BEGIN_SEARCHING + for(size_t i = 0; i < m; ++i) + corp_begin[n+i] = pat_begin[i]; + + + std::vector result; + if( std::equal(pat_begin, pat_end, corp_begin)) + result.push_back(corp_begin); + + j = m; + mMinus1 = m - 1; + while (j < n) + { + while ( (FT[corp_begin[j]][corp_begin[j - 1]]) == UNDEFINED ) + { + j += mMinus1; + } + i = j - 2; + p = FT[corp_begin[j]][corp_begin[j - 1]]; + while ((p = trans[p][corp_begin[i]]) != UNDEFINED ) + { + i--; + } + if (i < j - mMinus1 && j < n) + { + result.push_back(corp_begin + j - mMinus1); + i++; + } + j = i + m; + } + //END_SEARCHING + + return result; +} + + +template +std::vector::type> +ebom_search(const corpusRange& corp_range, patIter pat_begin, patIter pat_end) +{ + return ebom_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector ebom_search(corpusIter corp_begin, corpusIter corp_end, + const patRange& pat_range) +{ + return ebom_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +ebom_search(const corpusRange& corp_range, const patRange& pat_range) +{ + return ebom_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); + +} + +}} + + +#endif //BOOST_ALGORITHM_EBOM_HPP diff --git a/include/boost/algorithm/searching/fjs.hpp b/include/boost/algorithm/searching/fjs.hpp new file mode 100644 index 000000000..b25e0e1ea --- /dev/null +++ b/include/boost/algorithm/searching/fjs.hpp @@ -0,0 +1,127 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_FRANEK_JENNINGS_SMYTH_HPP +#define BOOST_ALGORITHM_FRANEK_JENNINGS_SMYTH_HPP + + +#define SIGMA 256 //constant alphabet size + +#include +#include +#include + +#include +#include + +#include "search_util.hpp" + +namespace boost { namespace algorithm +{ + +template +void preKmp(patIter pat_begin, patIter pat_end, std::vector &kmpNexy) +{ + int m = std::distance(pat_begin, pat_end); + int i, j; + i = 0; + j = kmpNexy[0] = -1; + while (i < m) + { + while (j > -1 && pat_begin[i] != pat_begin[j]) + { + j = kmpNexy[j]; + } + i++; + j++; + if (i < m && pat_begin[i] == pat_begin[j]) + { + kmpNexy[i] = kmpNexy[j]; + } + else + { + kmpNexy[i] = j; + } + } +} + +template +std::vector fjs_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int m = std::distance(pat_begin, pat_end), + n = std::distance(corp_begin, corp_end); + + int i, s, count; + std::vector qsbc(SIGMA), kmp(m); + /* Preprocessing */ + //BEGIN_PREPROCESSING + preQsBc(pat_begin, pat_end, qsbc); + preKmp(pat_begin, pat_end, kmp); + //END_PREPROCESSING + + /* Searching */ + //BEGIN_SEARCHING + s = 0; + count = 0; + std::vector result; + while (s <= n - m) + { + while (s <= n - m && pat_begin[m - 1] != corp_begin[s + m - 1]) + { + s += qsbc[corp_begin[s + m]]; + } + if (s > n - m) + { + return result; + } + i = 0; + while (i < m && pat_begin[i] == corp_begin[s + i]) + { + i++; + } + if (i >= m) + { + //count++; + result.push_back(corp_begin + s); + } + s += i - kmp[i]; + } + //END_SEARCHING + return result; +} + + +template +std::vector::type> +fjs_search(const corpusRange &corp_range, patIter pat_begin, patIter pat_end) +{ + return fjs_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector fjs_search(corpusIter corp_begin, corpusIter corp_end, + const patRange &pat_range) +{ + return fjs_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +fjs_search(const corpusRange &corp_range, const patRange &pat_range) +{ + return fjs_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); +} + +}} +#endif //BOOST_ALGORITHM_FRANEK_JENNINGS_SMYTH_HPP diff --git a/include/boost/algorithm/searching/quick_search.hpp b/include/boost/algorithm/searching/quick_search.hpp new file mode 100644 index 000000000..e02816f53 --- /dev/null +++ b/include/boost/algorithm/searching/quick_search.hpp @@ -0,0 +1,99 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_QUICK_SEARCH_HPP +#define BOOST_ALGORITHM_QUICK_SEARCH_HPP + + +#define SIGMA 256 //constant alphabet size + +#include +#include +#include + +#include +#include + +namespace boost { namespace algorithm { + +template +void preQsBc(patIter pat_begin, patIter pat_end, std::vector& qbc) +{ + int m = std::distance(pat_begin, pat_end); + for(int i = 0;i < SIGMA; i++) + { + qbc[i] = m + 1; + } + for(int i = 0;i < m; i++) + { + qbc[pat_begin[i]] = m - i; + } +} + + +template +std::vector quick_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int i, s; + std::vector qsbc(SIGMA); + int m = std::distance(pat_begin, pat_end), + n = std::distance(corp_begin, corp_end); + + /* Preprocessing */ + preQsBc(pat_begin, pat_end, qsbc); + + /* Searching */ + s = 0; + std::vector result; + while(s <= n - m) + { + i = 0; + while(i < m && pat_begin[i] == corp_begin[s + i]) + { + i++; + } + if(i == m) + { + result.push_back(corp_begin + s); + } + s += qsbc[corp_begin[s + m]]; + } + return result; +} + + +template +std::vector::type> +quick_search(const corpusRange& corp_range, patIter pat_begin, patIter pat_end) +{ + return quick_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector quick_search(corpusIter corp_begin, corpusIter corp_end, + const patRange& pat_range) +{ + return quick_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +quick_search(const corpusRange& corp_range, const patRange& pat_range) +{ + return quick_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); +} + +} +} +#endif //BOOST_ALGORITHM_QUICK_SEARCH_HPP diff --git a/include/boost/algorithm/searching/raita.hpp b/include/boost/algorithm/searching/raita.hpp new file mode 100644 index 000000000..a6a88774f --- /dev/null +++ b/include/boost/algorithm/searching/raita.hpp @@ -0,0 +1,94 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_RAITA_HPP +#define BOOST_ALGORITHM_RAITA_HPP + + +#define SIGMA 256 //constant alphabet size + +#include +#include +#include + +#include +#include + +#include "search_util.hpp" + +namespace boost { namespace algorithm { + + +template +std::vector raita_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int m = std::distance(pat_begin, pat_end), + n = std::distance(corp_begin, corp_end); + int j; + unsigned char c, firstCh, *secondCh, middleCh, lastCh; + std::vector bmBc(SIGMA); + + //TODO: rewrite this fucking shit! + /* Preprocessing */ + preBmBc(pat_begin, pat_end, bmBc); + firstCh = pat_begin[0]; + secondCh = pat_begin[1]; + middleCh = pat_begin[m / 2]; + lastCh = pat_begin[m - 1]; + + /* Searching */ + count = 0; + j = 0; + std::vector result; + while (j <= n - m) + { + c = corp_begin[j + m - 1]; + if (lastCh == c && middleCh == corp_begin[j + m/2] && + firstCh == corp_begin[j] && + std::equal(secondCh, secondCh + m - 2, corp_begin + j + 1) + /*memcmp(secondCh, corp_begin + j + 1, m - 2) == 0*/) + { + //OUTPUT(j); + result.push_back(corp_begin + j); + } + j += bmBc[c]; + } + return result; +} + + +template +std::vector::type> +raita_search(const corpusRange& corp_range, patIter pat_begin, patIter pat_end) +{ + return raita_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector raita_search(corpusIter corp_begin, corpusIter corp_end, + const patRange& pat_range) +{ + return raita_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +raita_search(const corpusRange& corp_range, const patRange& pat_range) +{ + return raita_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); +} + +} +} +#endif //BOOST_ALGORITHM_RAITA_HPP diff --git a/include/boost/algorithm/searching/search_util.hpp b/include/boost/algorithm/searching/search_util.hpp new file mode 100644 index 000000000..aff9bde0d --- /dev/null +++ b/include/boost/algorithm/searching/search_util.hpp @@ -0,0 +1,98 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_SEARCH_UTIL_HPP +#define BOOST_ALGORITHM_SEARCH_UTIL_HPP + +#include +#include + + +namespace boost { namespace algorithm { + + +template +void suffixes(patIter pat_begin, patIter pat_end, std::vector& suff) +{ + int m = std::distance(pat_begin, pat_end); + int f, g, i; + suff[m - 1] = m; + g = m - 1; + for (i = m - 2; i >= 0; --i) + { + if (i > g && suff[i + m - 1 - f] < i - g) + { + suff[i] = suff[i + m - 1 - f]; + } + else + { + if (i < g) + { + g = i; + } + f = i; + while (g >= 0 && pat_begin[g] == pat_begin[g + m - 1 - f]) + { + --g; + } + suff[i] = f - g; + } + } +} + + +template +void preBmGs(patIter pat_begin, patIter pat_end, std::vector& bmGs) +{ + int m = std::distance(pat_begin, pat_end); + int i, j; + std::vector suff(m); + suffixes(pat_begin, pat_end, suff); + for (i = 0; i < m; ++i) + { + bmGs[i] = m; + } + j = 0; + for (i = m - 1; i >= 0; --i) + { + if (suff[i] == i + 1) + { + for (; j < m - 1 - i; ++j) + { + if (bmGs[j] == m) + { + bmGs[j] = m - 1 - i; + } + } + } + } + for (i = 0; i <= m - 2; ++i) + { + bmGs[m - 1 - suff[i]] = m - 1 - i; + } +} + + +template +void preBmBc(patIter pat_begin, patIter pat_end, std::vector& bmBc) +{ + int m = std::distance(pat_begin, pat_end); + for (int i = 0; i < SIGMA; ++i) + { + bmBc[i] = m; + } + for (int i = 0; i < m - 1; ++i) + { + bmBc[pat_begin[i]] = m - i - 1; + } +} +} +} +#endif //BOOST_ALGORITHM_SEARCH_UTIL_HPP diff --git a/include/boost/algorithm/searching/tuned_boyer_moore.hpp b/include/boost/algorithm/searching/tuned_boyer_moore.hpp new file mode 100644 index 000000000..bc991926f --- /dev/null +++ b/include/boost/algorithm/searching/tuned_boyer_moore.hpp @@ -0,0 +1,95 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_TUNED_BOYER_MOORE_HPP +#define BOOST_ALGORITHM_TUNED_BOYER_MOORE_HPP + + +#define SIGMA 256 //constant alphabet size + +#include +#include +#include + +#include +#include + +#include "search_util.hpp" + +namespace boost { namespace algorithm { + + +template +std::vector tuned_boyer_moore_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int m = std::distance(pat_begin, pat_end), + n = std::distance(corp_begin, corp_end); + int j, k, shift; + std::vector bmBc(SIGMA); + + /* Preprocessing */ + preBmBc(pat_begin, pat_end, bmBc); + shift = bmBc[pat_begin[m - 1]]; + bmBc[pat_begin[m - 1]] = 0; + + /* Searching */ + j = 0; + std::vector result; + while (j <= n-m) + { + k = bmBc[corp_begin[j + m -1]]; + while (k != 0) + { + j += k; k = bmBc[corp_begin[j + m -1]]; + j += k; k = bmBc[corp_begin[j + m -1]]; + j += k; k = bmBc[corp_begin[j + m -1]]; + } + if (std::equal(pat_begin, pat_begin + m - 1, corp_begin + j) && j <= n-m) + { + if (j <= n - m) + { + result.push_back(corp_begin + j); + } + } + j += shift; + } + return result; +} + + + +template +std::vector::type> +tuned_boyer_moore_search(const corpusRange& corp_range, patIter pat_begin, patIter pat_end) +{ + return tuned_boyer_moore_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector tuned_boyer_moore_search(corpusIter corp_begin, corpusIter corp_end, + const patRange& pat_range) +{ + return tuned_boyer_moore_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +tuned_boyer_moore_search(const corpusRange& corp_range, const patRange& pat_range) +{ + return tuned_boyer_moore_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); +} + +} +} +#endif //BOOST_ALGORITHM_TUNED_BOYER_MOORE_HPP diff --git a/include/boost/algorithm/searching/turbo_boyer_moore.hpp b/include/boost/algorithm/searching/turbo_boyer_moore.hpp new file mode 100644 index 000000000..bb0081289 --- /dev/null +++ b/include/boost/algorithm/searching/turbo_boyer_moore.hpp @@ -0,0 +1,116 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_TURBO_BOYER_MOORE_HPP +#define BOOST_ALGORITHM_TURBO_BOYER_MOORE_HPP + + +#define SIGMA 256 //constant alphabet size + +#include +#include +#include + +#include +#include + +#include "search_util.hpp" + +namespace boost { namespace algorithm { + + +template +std::vector turbo_boyer_moore_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int m = std::distance(pat_begin, pat_end), + n = std::distance(corp_begin, corp_end); + int bcShift, i, j, shift, u, v, turboShift; + std::vector bmGs(m), bmBc(SIGMA); + + /* Preprocessing */ + preBmGs(pat_begin, pat_end, bmGs); + preBmBc(pat_begin, pat_end, bmBc); + + + /* Searching */ + j = u = 0; + shift = m; + std::vector result; + while (j <= n - m) + { + i = m - 1; + while (i >= 0 && pat_begin[i] == corp_begin[i + j]) + { + --i; + if (u != 0 && i == m - 1 - shift) + { + i -= u; + } + } + if (i < 0) + { + result.push_back(corp_begin + j); + shift = bmGs[0]; + u = m - shift; + } + else + { + v = m - 1 - i; + turboShift = u - v; + bcShift = bmBc[corp_begin[i + j]] - m + 1 + i; + shift = std::max(turboShift, bcShift); + shift = std::max(shift, bmGs[i]); + if (shift == bmGs[i]) + { + u = std::min(m - shift, v); + } + else + { + if (turboShift < bcShift) + { + shift = std::max(shift, u + 1); + } + u = 0; + } + } + j += shift; + } + return result; +} + + +template +std::vector::type> +turbo_boyer_moore_search(const corpusRange& corp_range, patIter pat_begin, patIter pat_end) +{ + return turbo_boyer_moore_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector turbo_boyer_moore_search(corpusIter corp_begin, corpusIter corp_end, + const patRange& pat_range) +{ + return turbo_boyer_moore_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +turbo_boyer_moore_search(const corpusRange& corp_range, const patRange& pat_range) +{ + return turbo_boyer_moore_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); +} + +} +} +#endif //BOOST_ALGORITHM_TURBO_BOYER_MOORE_HPP diff --git a/include/boost/algorithm/searching/zhu_takaoka.hpp b/include/boost/algorithm/searching/zhu_takaoka.hpp new file mode 100644 index 000000000..ae30ea592 --- /dev/null +++ b/include/boost/algorithm/searching/zhu_takaoka.hpp @@ -0,0 +1,114 @@ +/* + Copyright (c) Alexander Zaitsev , 2016 + + Distributed under the Boost Software License, Version 1.0. (See + accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + + See http://www.boost.org/ for latest version. +*/ + +#ifndef BOOST_ALGORITHM_ZHU_TAKAOKA_HPP +#define BOOST_ALGORITHM_ZHU_TAKAOKA_HPP + + +#define SIGMA 256 //constant alphabet size + +#include +#include +#include + +#include +#include + +#include "search_util.hpp" + +namespace boost { namespace algorithm { + + +template +void preZtBc(patIter pat_begin, patIter pat_end, int ztBc[SIGMA][SIGMA]) +{ + int m = std::distance(pat_begin, pat_end); + for (int i = 0; i < SIGMA; ++i) + { + for (int j = 0; j < SIGMA; ++j) + { + ztBc[i][j] = m; + } + } + for (int i = 0; i < SIGMA; ++i) + { + ztBc[i][pat_begin[0]] = m - 1; + } + for (int i = 1; i < m - 1; ++i) + { + ztBc[pat_begin[i - 1]][pat_begin[i]] = m - 1 - i; + } +} + + + +template +std::vector zhu_takaoka_search(corpusIter corp_begin, corpusIter corp_end, + patIter pat_begin, patIter pat_end) +{ + int i, j, ztBc[SIGMA][SIGMA]; + int m = std::distance(pat_begin, pat_end), + n = std::distance(corp_begin, corp_end); + std::vector bmGs(m); + /* Preprocessing */ + preZtBc(pat_begin, pat_end, ztBc); + preBmGs(pat_begin, pat_end, bmGs); + + /* Searching */ + j = 0; + std::vector result; + while (j <= n - m) + { + i = m - 1; + while (i >= 0 && pat_begin[i] == corp_begin[i + j]) + { + --i; + } + if (i < 0) + { + result.push_back(corp_begin + j); + j += bmGs[0]; + } + else + { + j += std::max(bmGs[i], ztBc[corp_begin[j + m - 2]][corp_begin[j + m - 1]]); + } + } + return result; +} + + +template +std::vector::type> +zhu_takaoka_search(const corpusRange& corp_range, patIter pat_begin, patIter pat_end) +{ + return zhu_takaoka_search(boost::begin(corp_range), boost::end(corp_range), pat_begin, pat_end); +} + + +template +std::vector zhu_takaoka_search(corpusIter corp_begin, corpusIter corp_end, + const patRange& pat_range) +{ + return zhu_takaoka_search(corp_begin, corp_end, boost::begin(pat_range), boost::end(pat_range)); +} + + +template +std::vector::type> +zhu_takaoka_search(const corpusRange& corp_range, const patRange& pat_range) +{ + return zhu_takaoka_search(boost::begin(corp_range), boost::end(corp_range), + boost::begin(pat_range), boost::end(pat_range)); +} + +} +} +#endif //BOOST_ALGORITHM_ZHU_TAKAOKA_HPP