Skip to content

Commit 34f3d96

Browse files
committed
Replace SHA function by something very fast
My estimate is that this should increase speed by ~2.5X. Fixes issue #618
1 parent b1c26b2 commit 34f3d96

File tree

1 file changed

+13
-8
lines changed

1 file changed

+13
-8
lines changed

lib/markdown2.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,10 @@
122122
from abc import ABC, abstractmethod
123123
import functools
124124
from collections.abc import Iterable
125-
from hashlib import sha256
126125
from random import random
127126
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, TypedDict, Union
128127
from collections.abc import Collection
129128
from enum import IntEnum, auto
130-
from os import urandom
131129

132130
# ---- type defs
133131
_safe_mode = Literal['replace', 'escape']
@@ -143,11 +141,18 @@
143141
DEFAULT_TAB_WIDTH = 4
144142

145143

146-
SECRET_SALT = urandom(16)
147-
# MD5 function was previously used for this; the "md5" prefix was kept for
148-
# backwards compatibility.
144+
# _hash_text is used to temporarily escape (replace) characters and HTML which
145+
# should be ignored by the processor.
146+
#
147+
# Afterwards, we find stuff that looks like 'key32-???' and convert it back
148+
# to the escaped things.
149149
def _hash_text(s: str) -> str:
150-
return 'md5-' + sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:]
150+
h = hash(s) # Not cryptographically secure, and that's fine
151+
h = abs(h)*2 + int(h>0) # As a non-negative number
152+
h = hex(h)[2:34] # Convert to hex.
153+
h = '0'*(32-len(h)) + h # Pad if needed
154+
return 'key32-' + h
155+
HASH_REGEX = r'key32-[0-9a-f]{32}'
151156

152157
# Table of hash values for escaped characters:
153158
g_escape_table = {ch: _hash_text(ch)
@@ -1341,7 +1346,7 @@ def _is_code_span(index, token):
13411346
except IndexError:
13421347
return False
13431348

1344-
return re.match(r'<code>md5-[A-Fa-f0-9]{32}</code>', ''.join(peek_tokens))
1349+
return re.match('<code>' + HASH_REGEX + '</code>', ''.join(peek_tokens))
13451350

13461351
def _is_comment(token):
13471352
if self.safe_mode == 'replace':
@@ -2491,7 +2496,7 @@ def sub_hash(self, match: re.Match) -> str:
24912496
def test(self, text):
24922497
if self.md.order < Stage.ITALIC_AND_BOLD:
24932498
return '*' in text or '_' in text
2494-
return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
2499+
return self.hash_table and re.search(HASH_REGEX, text)
24952500

24962501

24972502
class _LinkProcessorExtraOpts(TypedDict, total=False):

0 commit comments

Comments
 (0)