Skip to content

Commit 359ad78

Browse files
authored
Add multi mode encoding (#676)
* Rename AlphanumericEncoder.CanEncode * update * Add multi mode encoding * update * fix formatting * Memory optimization * Update function name * Add test * Eliminate recursive code * Add overloads to AlphanumericEncoder * Update * Add test to ensure the compressed text is not longer than the original * Update QRCoder/QRCodeGenerator.cs * update comment
1 parent fd548fb commit 359ad78

File tree

4 files changed

+414
-0
lines changed

4 files changed

+414
-0
lines changed

QRCoder/QRCodeGenerator.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ public static QRCodeData GenerateQrCode(string plainText, ECCLevel eccLevel, boo
125125
/// </summary>
126126
private static DataSegment CreateDataSegment(string plainText, bool forceUtf8, bool utf8BOM, EciMode eciMode)
127127
{
128+
// Fast path: Use optimized Latin1 segment if conditions allow
129+
if (!forceUtf8 && !utf8BOM && eciMode == EciMode.Default && OptimizedLatin1DataSegment.CanEncode(plainText))
130+
return new OptimizedLatin1DataSegment(plainText);
131+
128132
var encoding = GetEncodingFromPlaintext(plainText, forceUtf8);
129133

130134
// Use specialized segment classes based on encoding mode
Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
#pragma warning disable IDE0018 // Inline variable declaration -- false positive
2+
3+
namespace QRCoder;
4+
5+
public partial class QRCodeGenerator
6+
{
7+
/// <summary>
8+
/// Data segment that optimizes encoding by analyzing character patterns and switching between
9+
/// encoding modes (Numeric, Alphanumeric, Byte) to minimize the total bit length.
10+
/// This implements the QR Code optimization algorithm from ISO/IEC 18004:2015 Annex J.2.
11+
/// It does not support Kanji mode.
12+
/// </summary>
13+
private sealed class OptimizedLatin1DataSegment : DataSegment
14+
{
15+
/// <summary>
16+
/// Checks if the given string can be encoded using optimized Latin-1 encoding.
17+
/// Returns true if all characters are within the ISO-8859-1 range (0x00-0xFF).
18+
/// </summary>
19+
/// <param name="plainText">The text to check</param>
20+
/// <returns>True if the text can be encoded as ISO-8859-1, false otherwise</returns>
21+
public static bool CanEncode(string plainText) => IsValidISO(plainText);
22+
23+
/// <summary>
24+
/// Gets the encoding mode (used only for DataTooLongException)
25+
/// </summary>
26+
public override EncodingMode EncodingMode => EncodingMode.Byte;
27+
28+
/// <summary>
29+
/// Initializes a new instance of the OptimizedDataSegment class.
30+
/// </summary>
31+
/// <param name="plainText">The text to encode with optimized mode switching</param>
32+
public OptimizedLatin1DataSegment(string plainText)
33+
: base(plainText)
34+
{
35+
}
36+
37+
/// <summary>
38+
/// Calculates the total bit length for this segment when encoded for a specific QR code version.
39+
/// </summary>
40+
/// <param name="version">The QR code version (1-40, or -1 to -4 for Micro QR)</param>
41+
/// <returns>The total number of bits required for this segment</returns>
42+
public override int GetBitLength(int version)
43+
{
44+
if (string.IsNullOrEmpty(Text))
45+
return 0;
46+
47+
var totalBits = 0;
48+
var mode = SelectInitialMode(Text, 0, version);
49+
var startPos = 0;
50+
51+
do
52+
{
53+
// Find the extent of the current mode
54+
EncodingMode nextMode;
55+
var segmentEnd = mode switch
56+
{
57+
EncodingMode.Byte => ProcessByteMode(Text, startPos, version, out nextMode),
58+
EncodingMode.Alphanumeric => ProcessAlphanumericMode(Text, startPos, version, out nextMode),
59+
EncodingMode.Numeric => ProcessNumericMode(Text, startPos, version, out nextMode),
60+
_ => throw new InvalidOperationException("Unsupported encoding mode")
61+
};
62+
63+
var segmentLength = segmentEnd - startPos;
64+
totalBits += mode switch
65+
{
66+
EncodingMode.Numeric => NumericDataSegment.GetBitLength(segmentLength, version),
67+
EncodingMode.Alphanumeric => AlphanumericDataSegment.GetBitLength(segmentLength, version),
68+
EncodingMode.Byte => GetByteBitLength(segmentLength, version),
69+
_ => throw new InvalidOperationException("Unsupported encoding mode")
70+
};
71+
72+
// Move to the next segment
73+
startPos = segmentEnd;
74+
mode = nextMode;
75+
}
76+
while (startPos < Text.Length);
77+
78+
return totalBits;
79+
}
80+
81+
/// <summary>
82+
/// Calculates the bit length for a byte mode segment.
83+
/// </summary>
84+
private static int GetByteBitLength(int textLength, int version)
85+
{
86+
int modeIndicatorLength = 4;
87+
int countIndicatorLength = GetCountIndicatorLength(version, EncodingMode.Byte);
88+
int dataLength = textLength * 8; // ISO-8859-1 encoding
89+
return modeIndicatorLength + countIndicatorLength + dataLength;
90+
}
91+
92+
/// <summary>
93+
/// Writes this data segment to an existing BitArray at the specified index.
94+
/// </summary>
95+
/// <param name="bitArray">The target BitArray to write to</param>
96+
/// <param name="startIndex">The starting index in the BitArray</param>
97+
/// <param name="version">The QR code version (1-40, or -1 to -4 for Micro QR)</param>
98+
/// <returns>The next index in the BitArray after the last bit written</returns>
99+
public override int WriteTo(BitArray bitArray, int startIndex, int version)
100+
{
101+
if (string.IsNullOrEmpty(Text))
102+
return startIndex;
103+
104+
var bitIndex = startIndex;
105+
var mode = SelectInitialMode(Text, 0, version);
106+
var startPos = 0;
107+
108+
do
109+
{
110+
// Find the extent of the current mode
111+
EncodingMode nextMode;
112+
var segmentEnd = mode switch
113+
{
114+
EncodingMode.Byte => ProcessByteMode(Text, startPos, version, out nextMode),
115+
EncodingMode.Alphanumeric => ProcessAlphanumericMode(Text, startPos, version, out nextMode),
116+
EncodingMode.Numeric => ProcessNumericMode(Text, startPos, version, out nextMode),
117+
_ => throw new InvalidOperationException("Unsupported encoding mode")
118+
};
119+
120+
var segmentLength = segmentEnd - startPos;
121+
bitIndex = mode switch
122+
{
123+
EncodingMode.Numeric => NumericDataSegment.WriteTo(Text, startPos, segmentLength, bitArray, bitIndex, version),
124+
EncodingMode.Alphanumeric => AlphanumericDataSegment.WriteTo(Text, startPos, segmentLength, bitArray, bitIndex, version),
125+
EncodingMode.Byte => WriteByteSegment(Text, startPos, segmentLength, bitArray, bitIndex, version),
126+
_ => throw new InvalidOperationException("Unsupported encoding mode")
127+
};
128+
129+
// Move to the next segment
130+
startPos = segmentEnd;
131+
mode = nextMode;
132+
}
133+
while (startPos < Text.Length);
134+
135+
return bitIndex;
136+
}
137+
138+
/// <summary>
139+
/// Writes a byte mode segment to the BitArray.
140+
/// </summary>
141+
private static int WriteByteSegment(string text, int offset, int length, BitArray bitArray, int bitIndex, int version)
142+
{
143+
// write mode indicator
144+
bitIndex = DecToBin((int)EncodingMode.Byte, 4, bitArray, bitIndex);
145+
146+
// write count indicator
147+
int countIndicatorLength = GetCountIndicatorLength(version, EncodingMode.Byte);
148+
bitIndex = DecToBin(length, countIndicatorLength, bitArray, bitIndex);
149+
150+
// write data - encode as ISO-8859-1
151+
for (int i = 0; i < length; i++)
152+
{
153+
bitIndex = DecToBin(text[offset + i], 8, bitArray, bitIndex);
154+
}
155+
156+
return bitIndex;
157+
}
158+
159+
// Selects the initial encoding mode based on the first character(s) of the input.
160+
// Implements rules from ISO/IEC 18004:2015 Annex J.2 section a.
161+
private static EncodingMode SelectInitialMode(string text, int startPos, int version)
162+
{
163+
var c = text[startPos];
164+
165+
// Rule a.1: If initial input data is in the exclusive subset of the Byte character set, select Byte mode
166+
if (!IsAlphanumeric(c))
167+
return EncodingMode.Byte;
168+
169+
// Rule a.4: If initial data is numeric, AND if there are less than [4,4,5] characters followed by data from the
170+
// exclusive subset of the Byte character set, THEN select Byte mode
171+
if (IsNumeric(c))
172+
{
173+
var numericCount = CountConsecutive(text, startPos, IsNumeric);
174+
var threshold = GetBreakpoint(version, 4, 4, 5);
175+
if (numericCount < threshold)
176+
{
177+
var nextPos = startPos + numericCount;
178+
if (nextPos < text.Length && !IsAlphanumericNonDigit(text[nextPos]))
179+
return EncodingMode.Byte;
180+
}
181+
// ELSE IF there are less than [7-9] characters followed by data from the exclusive subset of the Alphanumeric character set
182+
// THEN select Alphanumeric mode ELSE select Numeric mode
183+
threshold = GetBreakpoint(version, 7, 8, 9);
184+
if (numericCount < threshold)
185+
{
186+
var nextPos = startPos + numericCount;
187+
if (nextPos < text.Length && IsAlphanumericNonDigit(text[nextPos]))
188+
return EncodingMode.Alphanumeric;
189+
}
190+
return EncodingMode.Numeric;
191+
}
192+
193+
// Rule a.3: If initial input data is in the exclusive subset of the Alphanumeric character set AND if there are
194+
// less than [6-8] characters followed by data from the remainder of the Byte character set, THEN select Byte mode
195+
var alphanumericCount = CountConsecutive(text, startPos, IsAlphanumeric);
196+
var alphaThreshold = GetBreakpoint(version, 6, 7, 8);
197+
if (alphanumericCount < alphaThreshold)
198+
{
199+
var nextPos = startPos + alphanumericCount;
200+
if (nextPos < text.Length && !IsAlphanumeric(text[nextPos]))
201+
return EncodingMode.Byte;
202+
}
203+
return EncodingMode.Alphanumeric;
204+
}
205+
206+
// Processes text in Byte mode and determines when to switch to another mode.
207+
// Implements rules from ISO/IEC 18004:2015 Annex J.2 section b.
208+
private static int ProcessByteMode(string text, int startPos, int version, out EncodingMode nextMode)
209+
{
210+
var pos = startPos;
211+
212+
var numericThreshold = GetBreakpoint(version, 6, 8, 9);
213+
var alphaThreshold = GetBreakpoint(version, 11, 15, 16);
214+
while (pos < text.Length)
215+
{
216+
var c = text[pos];
217+
218+
// Rule b.3: If a sequence of at least [6,8,9] Numeric characters occurs before more data from the exclusive subset of the Byte character set, switch to Numeric mode
219+
var numericCount = CountConsecutive(text, pos, IsNumeric);
220+
if (numericCount >= numericThreshold)
221+
{
222+
nextMode = EncodingMode.Numeric;
223+
return pos;
224+
}
225+
226+
// Rule b.2: If a sequence of at least [11,15,16] character from the exclusive subset of the Alphanumeric character set occurs before more data from the exclusive subset of the Byte character set, switch to Alphanumeric mode
227+
var alphanumericCount = CountConsecutive(text, pos, IsAlphanumeric);
228+
if (alphanumericCount >= alphaThreshold)
229+
{
230+
nextMode = EncodingMode.Alphanumeric;
231+
return pos;
232+
}
233+
234+
// Continue in Byte mode
235+
pos++;
236+
}
237+
238+
nextMode = EncodingMode.Byte;
239+
return pos;
240+
}
241+
242+
// Processes text in Alphanumeric mode and determines when to switch to another mode.
243+
// Implements rules from ISO/IEC 18004:2015 Annex J.2 section c.
244+
private static int ProcessAlphanumericMode(string text, int startPos, int version, out EncodingMode nextMode)
245+
{
246+
var pos = startPos;
247+
248+
var threshold = GetBreakpoint(version, 13, 15, 17);
249+
while (pos < text.Length)
250+
{
251+
var c = text[pos];
252+
253+
// Rule c.2: If one or more characters from the exclusive subset of the Byte character set occurs, switch to Byte mode
254+
if (!IsAlphanumeric(c))
255+
{
256+
nextMode = EncodingMode.Byte;
257+
return pos;
258+
}
259+
260+
// Rule c.3: If a sequence of at least [13,15,17] Numeric characters occurs before more data from the exclusive subset of the Alphanumeric character set, switch to Numeric mode
261+
var numericCount = CountConsecutive(text, pos, IsNumeric);
262+
if (numericCount >= threshold)
263+
{
264+
nextMode = EncodingMode.Numeric;
265+
return pos;
266+
}
267+
268+
// Continue in Alphanumeric mode
269+
pos++;
270+
}
271+
272+
nextMode = EncodingMode.Alphanumeric;
273+
return pos;
274+
}
275+
276+
// Processes text in Numeric mode and determines when to switch to another mode.
277+
// Implements rules from ISO/IEC 18004:2015 Annex J.2 section d.
278+
private static int ProcessNumericMode(string text, int startPos, int version, out EncodingMode nextMode)
279+
{
280+
var pos = startPos;
281+
282+
while (pos < text.Length)
283+
{
284+
var c = text[pos];
285+
286+
// Rule d.2: If one or more characters from the exclusive subset of the Byte character set occurs, switch to Byte mode
287+
// Rule d.3: If one or more characters from the exclusive subset of the Alphanumeric character set occurs, switch to Alphanumeric mode
288+
289+
// Replaced by using the more intelligent intial mode logic:
290+
if (!IsNumeric(c))
291+
{
292+
nextMode = SelectInitialMode(text, pos, version);
293+
return pos;
294+
}
295+
296+
// Continue in Numeric mode
297+
pos++;
298+
}
299+
300+
nextMode = EncodingMode.Numeric;
301+
return pos;
302+
}
303+
304+
// Gets the appropriate breakpoint value based on QR code version.
305+
// ISO/IEC 18004:2015 Annex J.2 specifies different thresholds for different version ranges:
306+
// - Versions 1-9: Use v1_9 value
307+
// - Versions 10-26: Use v10_26 value
308+
// - Versions 27-40: Use v27_40 value
309+
private static int GetBreakpoint(int version, int v1_9, int v10_26, int v27_40)
310+
{
311+
if (version < 10)
312+
return v1_9;
313+
else if (version < 27)
314+
return v10_26;
315+
else
316+
return v27_40;
317+
}
318+
319+
// Counts consecutive characters matching a predicate starting from a position.
320+
private static int CountConsecutive(string text, int startPos, Func<char, bool> predicate)
321+
{
322+
var count = 0;
323+
for (var i = startPos; i < text.Length && predicate(text[i]); i++)
324+
count++;
325+
return count;
326+
}
327+
328+
// Checks if a character is numeric (0-9).
329+
private static bool IsNumeric(char c) => IsInRange(c, '0', '9');
330+
331+
// Checks if a character is alphanumeric (can be encoded in alphanumeric mode).
332+
private static bool IsAlphanumeric(char c) => IsNumeric(c) || IsAlphanumericNonDigit(c);
333+
334+
// Checks if a non-digit character can be encoded in alphanumeric mode.
335+
private static bool IsAlphanumericNonDigit(char c) => AlphanumericEncoder.CanEncodeNonDigit(c);
336+
}
337+
}

0 commit comments

Comments
 (0)