diff --git a/multibase.csv b/multibase.csv index d980df7..7ad33f2 100644 --- a/multibase.csv +++ b/multibase.csv @@ -14,6 +14,8 @@ base32upper, B, rfc4648 no padding, base32pad, c, rfc4648 with padding, candidate base32padupper, C, rfc4648 with padding, candidate base32z, h, z-base-32 (used by Tahoe-LAFS), draft +base32cx x, base32 over [4-9A-Z] with [a-z] letter-case checksum draft +base32ux X, base32cx using only uppercase (no checksum) draft base58flickr, Z, base58 flicker, candidate base58btc, z, base58 bitcoin, default base64, m, rfc4648 no padding, default diff --git a/rfcs/Base32CaseCheck.md b/rfcs/Base32CaseCheck.md new file mode 100644 index 0000000..a22acf7 --- /dev/null +++ b/rfcs/Base32CaseCheck.md @@ -0,0 +1,67 @@ +### Intro + +`base32cx` is a base-32 encoding with letter-case checksums inspired by the success of Ethereum’s [EIP55](https://github.com/ethereum/EIPs/blob/master/EIPS/eip-55.md). + +It is designed for encoding relatively short byte strings presented as human-skimmable strings, like file hashes or cryptocurrency addresses. + +The alphabet maximizes the number of alpha characters to increase the average number of checksum bits per string. + +`base32cx` has a variant, `base32ux`, which is the same alphabet without a checksum. + +The unchecked variant `base32ux` has the property that a lexical sort of encoded data is a bitwise sort of decoded data, like `base32hex`. + +### Alphabet + +``` + base32cx alphabet + + value: 0,1,2,3,4,5,[6..31] + encoding: 4,5,6,7,8,9,[A..Z] + lowered: 4,5,6,7,8,9,[a..z] + +value encoding value encoding value encoding value encoding +----- -------- ----- -------- ----- -------- ----- -------- + 0 4 8 C (c) 16 K (k) 24 S (s) + 1 5 9 D (d) 17 L (l) 25 T (t) + 2 6 10 E (e) 18 M (m) 26 U (u) + 3 7 11 F (f) 19 N (n) 27 V (v) + 4 8 12 G (g) 20 O (o) 28 W (w) + 5 9 13 H (h) 21 P (p) 29 X (x) + 6 A (a) 14 I (i) 22 Q (q) 30 Y (y) + 7 B (b) 15 J (j) 23 R (r) 31 Z (z) +``` + +### Checksum + +To checksum, take the sha256 of the data to be encoded. Call this hash `CHECK`. + +Encode the bytes using the alphabet above. (Hint: It's a drop-in replacement for `base32hex`). + +Lowercase the i'th character of encoded string if the (i % 256)‘th bit of `CHECK` is a 0. +Keep it uppercased if it is a 1. + +### Example + +``` +encode("Hello") + + encoding result note + + base32cx d5mQSv7j appears mixed / passes checksum + base32ux D5MQSV7j appears uniform / fails checksum + none d5mqsv7j appears mixed / fails checksum, uppercased might be base32ux +``` + +Uppercase letters are chosen for the unchecked variant because the numeric characters are “tall”. +This makes unchecked data appear uniform while checked data appears mixed-height. + +### Maximum size + +`base32cx` is only defined for byte sequences up to length 2^20 - 1, that is, one byte less than 1 MiB. +It is most likely not an appropriate choice of encoding for larger data. +For completeness, a standard method for hashing large data and applying the checksum in chunks will be specified in the future. +Until then, base32cx is simply not defined if the data to be encoded is longer than 2^20 - 1 bytes. + +### Source + +The spec was originally posted and is being maintained [here](https://word.site/2019/11/13/base32cx/). diff --git a/tests/test1.csv b/tests/test1.csv index 44d6003..5690fd3 100644 --- a/tests/test1.csv +++ b/tests/test1.csv @@ -13,6 +13,8 @@ base32padupper, "CIRSWGZLOORZGC3DJPJSSAZLWMVZHS5DINFXGOIJB" base32hexpad, "t8him6pbeehp62r39f9ii0pbmclp7it38d5n6e891" base32hexpadupper, "T8HIM6PBEEHP62R39F9II0PBMCLP7IT38D5N6E891" base32z, "het1sg3mqqt3gn5djxj11y3msci3817depfzgqejb" +base32cx, "xcLMQaTFIiltA6v7dJDmM4TfqGPtbmx7CH9Raicd5" +base32ux, "XCLMQATFIILTA6V7DJDMM4TFQGPTBMX7CH9RAICD5" base58flickr, "Ztwe7gVTeK8wswS1gf8hrgAua9fcw9reboD" base58btc, "zUXE7GvtEk8XTXs1GF8HSGbVA9FCX9SEBPe" base64, "mRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchIQ" diff --git a/tests/test2.csv b/tests/test2.csv index 97ecccb..b44c252 100644 --- a/tests/test2.csv +++ b/tests/test2.csv @@ -13,6 +13,8 @@ base32padupper, "CPFSXGIDNMFXGSIBB" base32hexpad, "tf5in683dc5n6i811" base32hexpadupper, "TF5IN683DC5N6I811" base32z, "hxf1zgedpcfzg1ebb" +base32cx, "xj9MRAC7hg9ramc55" +base32ux, "XJ9MRAC7HG9RAMC55" base58flickr, "Z7Pznk19XTTzBtx" base58btc, "z7paNL19xttacUY" base64, "meWVzIG1hbmkgIQ" diff --git a/tests/test3.csv b/tests/test3.csv index 4bfbc5e..39e5f65 100644 --- a/tests/test3.csv +++ b/tests/test3.csv @@ -13,6 +13,8 @@ base32padupper, "CNBSWY3DPEB3W64TMMQ======" base32hexpad, "td1imor3f41rmusjccg======" base32hexpadupper, "TD1IMOR3F41RMUSJCCG======" base32z, "hpb1sa5dxrb5s6hucco" +base32cx, "xH5MQSv7J85vqYWnGgk" +base32ux, "XH5MQSV7J85VQYWNGGK" base58flickr, "ZrTu1dk6cWsRYjYu" base58btc, "zStV1DL6CwTryKyV" base64, "maGVsbG8gd29ybGQ" diff --git a/tests/test4.csv b/tests/test4.csv index e02f128..01e631d 100644 --- a/tests/test4.csv +++ b/tests/test4.csv @@ -13,6 +13,8 @@ base32padupper, "CAB4WK4ZANVQW42JAEE======" base32hexpad, "t01smasp0dlgmsq9044======" base32hexpadupper, "T01SMASP0DLGMSQ9044======" base32z, "hybhskh3ypiosh4jyrr" +base32cx, "x45WQewt4HPKqwud488" +base32ux, "X45WQEWT4HPKQWUD488" base58flickr, "Z17Pznk19XTTzBtx" base58btc, "z17paNL19xttacUY" base64, "mAHllcyBtYW5pICE" diff --git a/tests/test5.csv b/tests/test5.csv index 9f70104..25ae41d 100644 --- a/tests/test5.csv +++ b/tests/test5.csv @@ -13,6 +13,8 @@ base32padupper, "CAAAHSZLTEBWWC3TJEAQQ====" base32hexpad, "t0007ipbj41mm2rj940gg====" base32hexpadupper, "T0007IPBJ41MM2RJ940GG====" base32z, "hyyy813murbssn5ujryoo" +base32cx, "x444BmtFN85qQ6vnD84Kk" +base32ux, "X444BMTFN85QQ6VND84KK" base58flickr, "Z117Pznk19XTTzBtx" base58btc, "z117paNL19xttacUY" base64, "mAAB5ZXMgbWFuaSAh"