Skip to content

Commit 98a44ea

Browse files
committed
Added Polynomial Hash Function.
1 parent b3a503a commit 98a44ea

File tree

4 files changed

+217
-0
lines changed

4 files changed

+217
-0
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ a set of rules that precisely define a sequence of operations.
123123
* `A` [Hamiltonian Cycle](src/algorithms/graph/hamiltonian-cycle) - Visit every vertex exactly once
124124
* `A` [Strongly Connected Components](src/algorithms/graph/strongly-connected-components) - Kosaraju's algorithm
125125
* `A` [Travelling Salesman Problem](src/algorithms/graph/travelling-salesman) - shortest possible route that visits each city and returns to the origin city
126+
* **Cryptography**
127+
* `B` [Polynomial Hash](src/algorithms/cryptography/polynomial-hash) - rolling hash function based on polynomial
126128
* **Uncategorized**
127129
* `B` [Tower of Hanoi](src/algorithms/uncategorized/hanoi-tower)
128130
* `B` [Square Matrix Rotation](src/algorithms/uncategorized/square-matrix-rotation) - in-place algorithm
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
const DEFAULT_PRIME = 37;
2+
3+
export default class PolynomialHash {
4+
/**
5+
* @param {number} [prime] - A prime number used to create the hash representation of a word.
6+
*/
7+
constructor(prime = DEFAULT_PRIME) {
8+
this.prime = prime;
9+
this.primeModulus = 101;
10+
}
11+
12+
/**
13+
* Function that creates hash representation of the word.
14+
*
15+
* Time complexity: O(word.length).
16+
*
17+
* @param {string} word - String that needs to be hashed.
18+
* @return {number}
19+
*/
20+
hash(word) {
21+
let hash = 0;
22+
23+
for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
24+
hash += word.charCodeAt(charIndex) * (this.prime ** charIndex);
25+
}
26+
27+
return hash;
28+
}
29+
30+
/**
31+
* Function that creates hash representation of the word
32+
* based on previous word (shifted by one character left) hash value.
33+
*
34+
* Recalculates the hash representation of a word so that it isn't
35+
* necessary to traverse the whole word again.
36+
*
37+
* Time complexity: O(1).
38+
*
39+
* @param {number} prevHash
40+
* @param {string} prevWord
41+
* @param {string} newWord
42+
* @return {number}
43+
*/
44+
roll(prevHash, prevWord, newWord) {
45+
const newWordLastIndex = newWord.length - 1;
46+
47+
let hash = prevHash - prevWord.charCodeAt(0);
48+
hash /= this.prime;
49+
hash += newWord.charCodeAt(newWordLastIndex) * (this.prime ** newWordLastIndex);
50+
51+
return hash;
52+
}
53+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Polynomial Rolling Hash
2+
3+
## Hash Function
4+
5+
**Hash functions** are used to map large data sets of elements of an arbitrary
6+
length (*the keys*) to smaller data sets of elements of a fixed length
7+
(*the fingerprints*).
8+
9+
The basic application of hashing is efficient testing of equality of keys by
10+
comparing their fingerprints.
11+
12+
A *collision* happens when two different keys have the same fingerprint. The way
13+
in which collisions are handled is crucial in most applications of hashing.
14+
Hashing is particularly useful in construction of efficient practical algorithms.
15+
16+
## Rolling Hash
17+
18+
A **rolling hash** (also known as recursive hashing or rolling checksum) is a hash
19+
function where the input is hashed in a window that moves through the input.
20+
21+
A few hash functions allow a rolling hash to be computed very quickly — the new
22+
hash value is rapidly calculated given only the following data:
23+
24+
- old hash value,
25+
- the old value removed from the window,
26+
- and the new value added to the window.
27+
28+
## Polynomial String Hashing
29+
30+
An ideal hash function for strings should obviously depend both on the *multiset* of
31+
the symbols present in the key and on the *order* of the symbols. The most common
32+
family of such hash functions treats the symbols of a string as coefficients of
33+
a *polynomial* with an integer variable `p` and computes its value modulo an
34+
integer constant `M`:
35+
36+
The *Rabin–Karp string search algorithm* is often explained using a very simple
37+
rolling hash function that only uses multiplications and
38+
additions - **polynomial rolling hash**:
39+
40+
> H(s<sub>0</sub>, s<sub>1</sub>, ..., s<sub>k</sub>) = (s<sub>0</sub> * p<sup>0</sup> + s<sub>1</sub> * p<sup>1</sup> + ... + s<sub>k</sub> * p<sup>k</sup>) mod M
41+
42+
where `p` is a constant, and *(s<sub>1</sub>, ... , s<sub>k</sub>)* are the input
43+
characters.
44+
45+
A careful choice of the parameters `M`, `p` is important to obtain “good”
46+
properties of the hash function, i.e., low collision rate.
47+
48+
In order to avoid manipulating huge `H` values, all math is done modulo `M`.
49+
50+
Removing and adding characters simply involves adding or subtracting the first or
51+
last term. Shifting all characters by one position to the right requires multiplying
52+
the entire sum `H` by `a`. Shifting all characters by one position to the left
53+
requires dividing the entire sum `H` by `a`.
54+
55+
## References
56+
57+
- [Where to Use Polynomial String Hashing](https://www.mii.lt/olympiads_in_informatics/pdf/INFOL119.pdf)
58+
- [Hash Function on Wikipedia](https://en.wikipedia.org/wiki/Hash_function)
59+
- [Rolling Hash on Wikipedia](https://en.wikipedia.org/wiki/Rolling_hash)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import PolynomialHash from '../PolynomialHash';
2+
3+
describe('PolynomialHash', () => {
4+
it('should calculate new hash based on previous one', () => {
5+
// const primes = [3, 79, 101, 3251, 13229, 122743, 3583213];
6+
// const frameSizes = [5, 20];
7+
8+
const primes = [3];
9+
const frameSizes = [20];
10+
11+
const text = 'Lorem Ipsum is simply dummy text of the printing and '
12+
+ 'typesetting industry. Lorem Ipsum has been the industry\'s standard '
13+
+ 'galley of type and \u{ffff} scrambled it to make a type specimen book. It '
14+
+ 'electronic 耀 typesetting, remaining essentially unchanged. It was '
15+
+ 'popularised in the \u{20005} \u{20000}1960s with the release of Letraset sheets '
16+
+ 'publishing software like Aldus PageMaker 耀 including versions of Lorem.';
17+
18+
// Check hashing for different prime base.
19+
primes.forEach((prime) => {
20+
const polynomialHash = new PolynomialHash(prime);
21+
22+
// Check hashing for different word lengths.
23+
frameSizes.forEach((frameSize) => {
24+
let previousWord = text.substr(0, frameSize);
25+
let previousHash = polynomialHash.hash(previousWord);
26+
27+
// Shift frame through the whole text.
28+
for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) {
29+
const currentWord = text.substr(frameShift, frameSize);
30+
const currentHash = polynomialHash.hash(currentWord);
31+
const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord);
32+
33+
// Check that rolling hash is the same as directly calculated hash.
34+
expect(currentRollingHash).toBe(currentHash);
35+
36+
previousWord = currentWord;
37+
previousHash = currentHash;
38+
}
39+
});
40+
});
41+
});
42+
43+
// it('should calculate new hash based on previous one', () => {
44+
// const polynomialHash = new PolynomialHash();
45+
//
46+
// const wordLength = 3;
47+
// const string = 'Hello World!';
48+
//
49+
// const word1 = string.substr(0, wordLength);
50+
// const word2 = string.substr(1, wordLength);
51+
// const word3 = string.substr(2, wordLength);
52+
// const word4 = string.substr(3, wordLength);
53+
//
54+
// const directHash1 = polynomialHash.hash(word1);
55+
// const directHash2 = polynomialHash.hash(word2);
56+
// const directHash3 = polynomialHash.hash(word3);
57+
// const directHash4 = polynomialHash.hash(word4);
58+
//
59+
// const rollingHash2 = polynomialHash.roll(directHash1, word1, word2);
60+
// const rollingHash3 = polynomialHash.roll(directHash2, word2, word3);
61+
// const rollingHash4 = polynomialHash.roll(directHash3, word3, word4);
62+
//
63+
// expect(directHash1).toBe(151661);
64+
// expect(directHash2).toBe(151949);
65+
// expect(directHash3).toBe(156063);
66+
// expect(directHash4).toBe(48023);
67+
//
68+
// expect(rollingHash2).toBe(directHash2);
69+
// expect(rollingHash3).toBe(directHash3);
70+
// expect(rollingHash4).toBe(directHash4);
71+
// });
72+
//
73+
// it('should calculate new hash based on previous one with 3 as a primeModulus', () => {
74+
// const PRIME = 3;
75+
// const polynomialHash = new PolynomialHash(PRIME);
76+
//
77+
// const wordLength = 3;
78+
// const string = 'Hello World!';
79+
//
80+
// const word1 = string.substr(0, wordLength);
81+
// const word2 = string.substr(1, wordLength);
82+
// const word3 = string.substr(2, wordLength);
83+
// const word4 = string.substr(3, wordLength);
84+
//
85+
// const directHash1 = polynomialHash.hash(word1);
86+
// const directHash2 = polynomialHash.hash(word2);
87+
// const directHash3 = polynomialHash.hash(word3);
88+
// const directHash4 = polynomialHash.hash(word4);
89+
//
90+
// const rollingHash2 = polynomialHash.roll(directHash1, word1, word2);
91+
// const rollingHash3 = polynomialHash.roll(directHash2, word2, word3);
92+
// const rollingHash4 = polynomialHash.roll(directHash3, word3, word4);
93+
//
94+
// expect(directHash1).toBe(1347);
95+
// expect(directHash2).toBe(1397);
96+
// expect(directHash3).toBe(1431);
97+
// expect(directHash4).toBe(729);
98+
//
99+
// expect(rollingHash2).toBe(directHash2);
100+
// expect(rollingHash3).toBe(directHash3);
101+
// expect(rollingHash4).toBe(directHash4);
102+
// });
103+
});

0 commit comments

Comments
 (0)