|
| 1 | +#pragma warning disable IDE0018 // Inline variable declaration -- false positive |
| 2 | + |
| 3 | +namespace QRCoder; |
| 4 | + |
| 5 | +public partial class QRCodeGenerator |
| 6 | +{ |
| 7 | + /// <summary> |
| 8 | + /// Data segment that optimizes encoding by analyzing character patterns and switching between |
| 9 | + /// encoding modes (Numeric, Alphanumeric, Byte) to minimize the total bit length. |
| 10 | + /// This implements the QR Code optimization algorithm from ISO/IEC 18004:2015 Annex J.2. |
| 11 | + /// It does not support Kanji mode. |
| 12 | + /// </summary> |
| 13 | + private sealed class OptimizedLatin1DataSegment : DataSegment |
| 14 | + { |
| 15 | + /// <summary> |
| 16 | + /// Checks if the given string can be encoded using optimized Latin-1 encoding. |
| 17 | + /// Returns true if all characters are within the ISO-8859-1 range (0x00-0xFF). |
| 18 | + /// </summary> |
| 19 | + /// <param name="plainText">The text to check</param> |
| 20 | + /// <returns>True if the text can be encoded as ISO-8859-1, false otherwise</returns> |
| 21 | + public static bool CanEncode(string plainText) => IsValidISO(plainText); |
| 22 | + |
| 23 | + /// <summary> |
| 24 | + /// Gets the encoding mode (used only for DataTooLongException) |
| 25 | + /// </summary> |
| 26 | + public override EncodingMode EncodingMode => EncodingMode.Byte; |
| 27 | + |
| 28 | + /// <summary> |
| 29 | + /// Initializes a new instance of the OptimizedDataSegment class. |
| 30 | + /// </summary> |
| 31 | + /// <param name="plainText">The text to encode with optimized mode switching</param> |
| 32 | + public OptimizedLatin1DataSegment(string plainText) |
| 33 | + : base(plainText) |
| 34 | + { |
| 35 | + } |
| 36 | + |
| 37 | + /// <summary> |
| 38 | + /// Calculates the total bit length for this segment when encoded for a specific QR code version. |
| 39 | + /// </summary> |
| 40 | + /// <param name="version">The QR code version (1-40, or -1 to -4 for Micro QR)</param> |
| 41 | + /// <returns>The total number of bits required for this segment</returns> |
| 42 | + public override int GetBitLength(int version) |
| 43 | + { |
| 44 | + if (string.IsNullOrEmpty(Text)) |
| 45 | + return 0; |
| 46 | + |
| 47 | + var totalBits = 0; |
| 48 | + var mode = SelectInitialMode(Text, 0, version); |
| 49 | + var startPos = 0; |
| 50 | + |
| 51 | + do |
| 52 | + { |
| 53 | + // Find the extent of the current mode |
| 54 | + EncodingMode nextMode; |
| 55 | + var segmentEnd = mode switch |
| 56 | + { |
| 57 | + EncodingMode.Byte => ProcessByteMode(Text, startPos, version, out nextMode), |
| 58 | + EncodingMode.Alphanumeric => ProcessAlphanumericMode(Text, startPos, version, out nextMode), |
| 59 | + EncodingMode.Numeric => ProcessNumericMode(Text, startPos, version, out nextMode), |
| 60 | + _ => throw new InvalidOperationException("Unsupported encoding mode") |
| 61 | + }; |
| 62 | + |
| 63 | + var segmentLength = segmentEnd - startPos; |
| 64 | + totalBits += mode switch |
| 65 | + { |
| 66 | + EncodingMode.Numeric => NumericDataSegment.GetBitLength(segmentLength, version), |
| 67 | + EncodingMode.Alphanumeric => AlphanumericDataSegment.GetBitLength(segmentLength, version), |
| 68 | + EncodingMode.Byte => GetByteBitLength(segmentLength, version), |
| 69 | + _ => throw new InvalidOperationException("Unsupported encoding mode") |
| 70 | + }; |
| 71 | + |
| 72 | + // Move to the next segment |
| 73 | + startPos = segmentEnd; |
| 74 | + mode = nextMode; |
| 75 | + } |
| 76 | + while (startPos < Text.Length); |
| 77 | + |
| 78 | + return totalBits; |
| 79 | + } |
| 80 | + |
| 81 | + /// <summary> |
| 82 | + /// Calculates the bit length for a byte mode segment. |
| 83 | + /// </summary> |
| 84 | + private static int GetByteBitLength(int textLength, int version) |
| 85 | + { |
| 86 | + int modeIndicatorLength = 4; |
| 87 | + int countIndicatorLength = GetCountIndicatorLength(version, EncodingMode.Byte); |
| 88 | + int dataLength = textLength * 8; // ISO-8859-1 encoding |
| 89 | + return modeIndicatorLength + countIndicatorLength + dataLength; |
| 90 | + } |
| 91 | + |
| 92 | + /// <summary> |
| 93 | + /// Writes this data segment to an existing BitArray at the specified index. |
| 94 | + /// </summary> |
| 95 | + /// <param name="bitArray">The target BitArray to write to</param> |
| 96 | + /// <param name="startIndex">The starting index in the BitArray</param> |
| 97 | + /// <param name="version">The QR code version (1-40, or -1 to -4 for Micro QR)</param> |
| 98 | + /// <returns>The next index in the BitArray after the last bit written</returns> |
| 99 | + public override int WriteTo(BitArray bitArray, int startIndex, int version) |
| 100 | + { |
| 101 | + if (string.IsNullOrEmpty(Text)) |
| 102 | + return startIndex; |
| 103 | + |
| 104 | + var bitIndex = startIndex; |
| 105 | + var mode = SelectInitialMode(Text, 0, version); |
| 106 | + var startPos = 0; |
| 107 | + |
| 108 | + do |
| 109 | + { |
| 110 | + // Find the extent of the current mode |
| 111 | + EncodingMode nextMode; |
| 112 | + var segmentEnd = mode switch |
| 113 | + { |
| 114 | + EncodingMode.Byte => ProcessByteMode(Text, startPos, version, out nextMode), |
| 115 | + EncodingMode.Alphanumeric => ProcessAlphanumericMode(Text, startPos, version, out nextMode), |
| 116 | + EncodingMode.Numeric => ProcessNumericMode(Text, startPos, version, out nextMode), |
| 117 | + _ => throw new InvalidOperationException("Unsupported encoding mode") |
| 118 | + }; |
| 119 | + |
| 120 | + var segmentLength = segmentEnd - startPos; |
| 121 | + bitIndex = mode switch |
| 122 | + { |
| 123 | + EncodingMode.Numeric => NumericDataSegment.WriteTo(Text, startPos, segmentLength, bitArray, bitIndex, version), |
| 124 | + EncodingMode.Alphanumeric => AlphanumericDataSegment.WriteTo(Text, startPos, segmentLength, bitArray, bitIndex, version), |
| 125 | + EncodingMode.Byte => WriteByteSegment(Text, startPos, segmentLength, bitArray, bitIndex, version), |
| 126 | + _ => throw new InvalidOperationException("Unsupported encoding mode") |
| 127 | + }; |
| 128 | + |
| 129 | + // Move to the next segment |
| 130 | + startPos = segmentEnd; |
| 131 | + mode = nextMode; |
| 132 | + } |
| 133 | + while (startPos < Text.Length); |
| 134 | + |
| 135 | + return bitIndex; |
| 136 | + } |
| 137 | + |
| 138 | + /// <summary> |
| 139 | + /// Writes a byte mode segment to the BitArray. |
| 140 | + /// </summary> |
| 141 | + private static int WriteByteSegment(string text, int offset, int length, BitArray bitArray, int bitIndex, int version) |
| 142 | + { |
| 143 | + // write mode indicator |
| 144 | + bitIndex = DecToBin((int)EncodingMode.Byte, 4, bitArray, bitIndex); |
| 145 | + |
| 146 | + // write count indicator |
| 147 | + int countIndicatorLength = GetCountIndicatorLength(version, EncodingMode.Byte); |
| 148 | + bitIndex = DecToBin(length, countIndicatorLength, bitArray, bitIndex); |
| 149 | + |
| 150 | + // write data - encode as ISO-8859-1 |
| 151 | + for (int i = 0; i < length; i++) |
| 152 | + { |
| 153 | + bitIndex = DecToBin(text[offset + i], 8, bitArray, bitIndex); |
| 154 | + } |
| 155 | + |
| 156 | + return bitIndex; |
| 157 | + } |
| 158 | + |
| 159 | + // Selects the initial encoding mode based on the first character(s) of the input. |
| 160 | + // Implements rules from ISO/IEC 18004:2015 Annex J.2 section a. |
| 161 | + private static EncodingMode SelectInitialMode(string text, int startPos, int version) |
| 162 | + { |
| 163 | + var c = text[startPos]; |
| 164 | + |
| 165 | + // Rule a.1: If initial input data is in the exclusive subset of the Byte character set, select Byte mode |
| 166 | + if (!IsAlphanumeric(c)) |
| 167 | + return EncodingMode.Byte; |
| 168 | + |
| 169 | + // Rule a.4: If initial data is numeric, AND if there are less than [4,4,5] characters followed by data from the |
| 170 | + // exclusive subset of the Byte character set, THEN select Byte mode |
| 171 | + if (IsNumeric(c)) |
| 172 | + { |
| 173 | + var numericCount = CountConsecutive(text, startPos, IsNumeric); |
| 174 | + var threshold = GetBreakpoint(version, 4, 4, 5); |
| 175 | + if (numericCount < threshold) |
| 176 | + { |
| 177 | + var nextPos = startPos + numericCount; |
| 178 | + if (nextPos < text.Length && !IsAlphanumericNonDigit(text[nextPos])) |
| 179 | + return EncodingMode.Byte; |
| 180 | + } |
| 181 | + // ELSE IF there are less than [7-9] characters followed by data from the exclusive subset of the Alphanumeric character set |
| 182 | + // THEN select Alphanumeric mode ELSE select Numeric mode |
| 183 | + threshold = GetBreakpoint(version, 7, 8, 9); |
| 184 | + if (numericCount < threshold) |
| 185 | + { |
| 186 | + var nextPos = startPos + numericCount; |
| 187 | + if (nextPos < text.Length && IsAlphanumericNonDigit(text[nextPos])) |
| 188 | + return EncodingMode.Alphanumeric; |
| 189 | + } |
| 190 | + return EncodingMode.Numeric; |
| 191 | + } |
| 192 | + |
| 193 | + // Rule a.3: If initial input data is in the exclusive subset of the Alphanumeric character set AND if there are |
| 194 | + // less than [6-8] characters followed by data from the remainder of the Byte character set, THEN select Byte mode |
| 195 | + var alphanumericCount = CountConsecutive(text, startPos, IsAlphanumeric); |
| 196 | + var alphaThreshold = GetBreakpoint(version, 6, 7, 8); |
| 197 | + if (alphanumericCount < alphaThreshold) |
| 198 | + { |
| 199 | + var nextPos = startPos + alphanumericCount; |
| 200 | + if (nextPos < text.Length && !IsAlphanumeric(text[nextPos])) |
| 201 | + return EncodingMode.Byte; |
| 202 | + } |
| 203 | + return EncodingMode.Alphanumeric; |
| 204 | + } |
| 205 | + |
| 206 | + // Processes text in Byte mode and determines when to switch to another mode. |
| 207 | + // Implements rules from ISO/IEC 18004:2015 Annex J.2 section b. |
| 208 | + private static int ProcessByteMode(string text, int startPos, int version, out EncodingMode nextMode) |
| 209 | + { |
| 210 | + var pos = startPos; |
| 211 | + |
| 212 | + var numericThreshold = GetBreakpoint(version, 6, 8, 9); |
| 213 | + var alphaThreshold = GetBreakpoint(version, 11, 15, 16); |
| 214 | + while (pos < text.Length) |
| 215 | + { |
| 216 | + var c = text[pos]; |
| 217 | + |
| 218 | + // Rule b.3: If a sequence of at least [6,8,9] Numeric characters occurs before more data from the exclusive subset of the Byte character set, switch to Numeric mode |
| 219 | + var numericCount = CountConsecutive(text, pos, IsNumeric); |
| 220 | + if (numericCount >= numericThreshold) |
| 221 | + { |
| 222 | + nextMode = EncodingMode.Numeric; |
| 223 | + return pos; |
| 224 | + } |
| 225 | + |
| 226 | + // Rule b.2: If a sequence of at least [11,15,16] character from the exclusive subset of the Alphanumeric character set occurs before more data from the exclusive subset of the Byte character set, switch to Alphanumeric mode |
| 227 | + var alphanumericCount = CountConsecutive(text, pos, IsAlphanumeric); |
| 228 | + if (alphanumericCount >= alphaThreshold) |
| 229 | + { |
| 230 | + nextMode = EncodingMode.Alphanumeric; |
| 231 | + return pos; |
| 232 | + } |
| 233 | + |
| 234 | + // Continue in Byte mode |
| 235 | + pos++; |
| 236 | + } |
| 237 | + |
| 238 | + nextMode = EncodingMode.Byte; |
| 239 | + return pos; |
| 240 | + } |
| 241 | + |
| 242 | + // Processes text in Alphanumeric mode and determines when to switch to another mode. |
| 243 | + // Implements rules from ISO/IEC 18004:2015 Annex J.2 section c. |
| 244 | + private static int ProcessAlphanumericMode(string text, int startPos, int version, out EncodingMode nextMode) |
| 245 | + { |
| 246 | + var pos = startPos; |
| 247 | + |
| 248 | + var threshold = GetBreakpoint(version, 13, 15, 17); |
| 249 | + while (pos < text.Length) |
| 250 | + { |
| 251 | + var c = text[pos]; |
| 252 | + |
| 253 | + // Rule c.2: If one or more characters from the exclusive subset of the Byte character set occurs, switch to Byte mode |
| 254 | + if (!IsAlphanumeric(c)) |
| 255 | + { |
| 256 | + nextMode = EncodingMode.Byte; |
| 257 | + return pos; |
| 258 | + } |
| 259 | + |
| 260 | + // Rule c.3: If a sequence of at least [13,15,17] Numeric characters occurs before more data from the exclusive subset of the Alphanumeric character set, switch to Numeric mode |
| 261 | + var numericCount = CountConsecutive(text, pos, IsNumeric); |
| 262 | + if (numericCount >= threshold) |
| 263 | + { |
| 264 | + nextMode = EncodingMode.Numeric; |
| 265 | + return pos; |
| 266 | + } |
| 267 | + |
| 268 | + // Continue in Alphanumeric mode |
| 269 | + pos++; |
| 270 | + } |
| 271 | + |
| 272 | + nextMode = EncodingMode.Alphanumeric; |
| 273 | + return pos; |
| 274 | + } |
| 275 | + |
| 276 | + // Processes text in Numeric mode and determines when to switch to another mode. |
| 277 | + // Implements rules from ISO/IEC 18004:2015 Annex J.2 section d. |
| 278 | + private static int ProcessNumericMode(string text, int startPos, int version, out EncodingMode nextMode) |
| 279 | + { |
| 280 | + var pos = startPos; |
| 281 | + |
| 282 | + while (pos < text.Length) |
| 283 | + { |
| 284 | + var c = text[pos]; |
| 285 | + |
| 286 | + // Rule d.2: If one or more characters from the exclusive subset of the Byte character set occurs, switch to Byte mode |
| 287 | + // Rule d.3: If one or more characters from the exclusive subset of the Alphanumeric character set occurs, switch to Alphanumeric mode |
| 288 | + |
| 289 | + // Replaced by using the more intelligent intial mode logic: |
| 290 | + if (!IsNumeric(c)) |
| 291 | + { |
| 292 | + nextMode = SelectInitialMode(text, pos, version); |
| 293 | + return pos; |
| 294 | + } |
| 295 | + |
| 296 | + // Continue in Numeric mode |
| 297 | + pos++; |
| 298 | + } |
| 299 | + |
| 300 | + nextMode = EncodingMode.Numeric; |
| 301 | + return pos; |
| 302 | + } |
| 303 | + |
| 304 | + // Gets the appropriate breakpoint value based on QR code version. |
| 305 | + // ISO/IEC 18004:2015 Annex J.2 specifies different thresholds for different version ranges: |
| 306 | + // - Versions 1-9: Use v1_9 value |
| 307 | + // - Versions 10-26: Use v10_26 value |
| 308 | + // - Versions 27-40: Use v27_40 value |
| 309 | + private static int GetBreakpoint(int version, int v1_9, int v10_26, int v27_40) |
| 310 | + { |
| 311 | + if (version < 10) |
| 312 | + return v1_9; |
| 313 | + else if (version < 27) |
| 314 | + return v10_26; |
| 315 | + else |
| 316 | + return v27_40; |
| 317 | + } |
| 318 | + |
| 319 | + // Counts consecutive characters matching a predicate starting from a position. |
| 320 | + private static int CountConsecutive(string text, int startPos, Func<char, bool> predicate) |
| 321 | + { |
| 322 | + var count = 0; |
| 323 | + for (var i = startPos; i < text.Length && predicate(text[i]); i++) |
| 324 | + count++; |
| 325 | + return count; |
| 326 | + } |
| 327 | + |
| 328 | + // Checks if a character is numeric (0-9). |
| 329 | + private static bool IsNumeric(char c) => IsInRange(c, '0', '9'); |
| 330 | + |
| 331 | + // Checks if a character is alphanumeric (can be encoded in alphanumeric mode). |
| 332 | + private static bool IsAlphanumeric(char c) => IsNumeric(c) || IsAlphanumericNonDigit(c); |
| 333 | + |
| 334 | + // Checks if a non-digit character can be encoded in alphanumeric mode. |
| 335 | + private static bool IsAlphanumericNonDigit(char c) => AlphanumericEncoder.CanEncodeNonDigit(c); |
| 336 | + } |
| 337 | +} |
0 commit comments