Skip to content

Commit 70d135b

Browse files
authored
[Xamarin.Android.Build.Tasks] Add support for Unicode strings (#9764)
Context: #9572 Add support for Unicode strings to the LLVM IR generator, together with de-duplication and support for outputting the same string encoded both as UTF-8 and Unicode (UTF16LE). Additionally, since all the files are generated from separate tasks, we don't have a global LLVM IR state which can keep track of strings and ensure that there are no duplicate symbol names. To prevent potential clashes, each generator now sets the default string group name which is unique for each module. ; From marshal_methods.arm64-v8a.ll @.mm.0 = dso_local constant [102 x i8] c"Android.App.Activity, Mono.Android, Version=0.0.0.0, Culture=neutral, PublicKeyToken=84e04ff9cfb79065\00", align 1 ; from environment.arm64-v8a.ll @.env.0 = dso_local constant [7 x i8] c"normal\00", align 1 ; from typemaps.arm64-v8a.ll @.tmr.0 = dso_local constant [22 x i8] c"android/os/BaseBundle\00", align 1 `mm`, `env`, and `tmr` are the default string groups for each module. In the future, we should try to manage strings globally (which would also result in more de-duplication).
1 parent 0fb7b45 commit 70d135b

16 files changed

+358
-65
lines changed

src/Xamarin.Android.Build.Tasks/Utilities/ApplicationConfigNativeAssemblyGenerator.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ public ApplicationConfigNativeAssemblyGenerator (IDictionary<string, string> env
203203

204204
protected override void Construct (LlvmIrModule module)
205205
{
206+
module.DefaultStringGroup = "env";
207+
206208
MapStructures (module);
207209

208210
module.AddGlobalVariable ("format_tag", FORMAT_TAG, comment: $" 0x{FORMAT_TAG:x}");
@@ -211,7 +213,7 @@ protected override void Construct (LlvmIrModule module)
211213
var envVars = new LlvmIrGlobalVariable (environmentVariables, "app_environment_variables") {
212214
Comment = " Application environment variables array, name:value",
213215
};
214-
module.Add (envVars, stringGroupName: "env", stringGroupComment: " Application environment variables name:value pairs");
216+
module.Add (envVars, stringGroupName: "env.var", stringGroupComment: " Application environment variables name:value pairs");
215217

216218
var sysProps = new LlvmIrGlobalVariable (systemProperties, "app_system_properties") {
217219
Comment = " System properties defined by the application",

src/Xamarin.Android.Build.Tasks/Utilities/CompressedAssembliesNativeAssemblyGenerator.cs

+2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ void InitCompressedAssemblies (out List<LlvmIrGlobalVariable>? compressedAssembl
149149

150150
protected override void Construct (LlvmIrModule module)
151151
{
152+
module.DefaultStringGroup = "cas";
153+
152154
MapStructures (module);
153155

154156
InitCompressedAssemblies (

src/Xamarin.Android.Build.Tasks/Utilities/JniRemappingAssemblyGenerator.cs

+2
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ uint GetLength (string str)
284284

285285
protected override void Construct (LlvmIrModule module)
286286
{
287+
module.DefaultStringGroup = "jremap";
288+
287289
MapStructures (module);
288290
List<StructureInstance<JniRemappingTypeReplacementEntry>>? typeReplacements;
289291
List<StructureInstance<JniRemappingIndexTypeEntry>>? methodIndexTypes;

src/Xamarin.Android.Build.Tasks/Utilities/LlvmIrGenerator/LlvmIrComposer.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ protected LlvmIrComposer (TaskLoggingHelper log)
2323

2424
public LlvmIrModule Construct ()
2525
{
26-
var module = new LlvmIrModule (cache);
26+
var module = new LlvmIrModule (cache, Log);
2727
Construct (module);
2828
module.AfterConstruction ();
2929
constructed = true;

src/Xamarin.Android.Build.Tasks/Utilities/LlvmIrGenerator/LlvmIrGenerator.cs

+102-31
Original file line numberDiff line numberDiff line change
@@ -199,12 +199,22 @@ void WriteStrings (GeneratorWriteContext context)
199199
}
200200

201201
foreach (LlvmIrStringVariable info in group.Strings) {
202-
string s = QuoteString ((string)info.Value, out ulong size);
202+
string s = QuoteString (info, out ulong size);
203203

204-
WriteGlobalVariableStart (context, info);
204+
if (!info.IsConstantStringLiteral) {
205+
WriteCommentLine (context, $" '{info.Value}'");
206+
}
207+
208+
WriteGlobalVariableName (context, info);
209+
210+
// Strings must always be local symbols, global variables will point to them
211+
WriteVariableOptions (context, LlvmIrVariableOptions.LocalString);
205212
context.Output.Write ('[');
206213
context.Output.Write (size.ToString (CultureInfo.InvariantCulture));
207-
context.Output.Write (" x i8] c");
214+
context.Output.Write ($" x {info.IrType}] ");
215+
if (info.IsConstantStringLiteral) {
216+
context.Output.Write ('c');
217+
}
208218
context.Output.Write (s);
209219
context.Output.Write (", align ");
210220
context.Output.WriteLine (target.GetAggregateAlignment (1, size).ToString (CultureInfo.InvariantCulture));
@@ -246,23 +256,37 @@ void WriteGlobalVariables (GeneratorWriteContext context)
246256
}
247257
}
248258

249-
void WriteGlobalVariableStart (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
259+
void WriteGlobalVariableName (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
250260
{
251261
if (!String.IsNullOrEmpty (variable.Comment)) {
252262
WriteCommentLine (context, variable.Comment);
253263
}
254264
context.Output.Write ('@');
255265
context.Output.Write (variable.Name);
256266
context.Output.Write (" = ");
267+
}
257268

258-
LlvmIrVariableOptions options = variable.Options ?? LlvmIrGlobalVariable.DefaultOptions;
269+
void WriteVariableOptions (GeneratorWriteContext context, LlvmIrVariableOptions options)
270+
{
259271
WriteLinkage (context, options.Linkage);
260272
WritePreemptionSpecifier (context, options.RuntimePreemption);
261273
WriteVisibility (context, options.Visibility);
262274
WriteAddressSignificance (context, options.AddressSignificance);
263275
WriteWritability (context, options.Writability);
264276
}
265277

278+
void WriteVariableOptions (GeneratorWriteContext context, LlvmIrGlobalVariable variable, LlvmIrVariableOptions? defaultOptions = null)
279+
{
280+
LlvmIrVariableOptions options = variable.Options ?? defaultOptions ?? LlvmIrGlobalVariable.DefaultOptions;
281+
WriteVariableOptions (context, options);
282+
}
283+
284+
void WriteGlobalVariableStart (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
285+
{
286+
WriteGlobalVariableName (context, variable);
287+
WriteVariableOptions (context, variable, LlvmIrGlobalVariable.DefaultOptions);
288+
}
289+
266290
void WriteGlobalVariable (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
267291
{
268292
if (!context.InVariableGroup) {
@@ -319,13 +343,22 @@ void WriteTypeAndValue (GeneratorWriteContext context, LlvmIrVariable variable,
319343
throw new InvalidOperationException ($"Internal error: variable '{variable.Name}'' of type {variable.Type} must not have a null value");
320344
}
321345

322-
if (valueType != variable.Type && !LlvmIrModule.NameValueArrayType.IsAssignableFrom (variable.Type)) {
346+
if (!IsValueAssignableFrom (valueType, variable) && !IsValueAssignableFrom (LlvmIrModule.NameValueArrayType, variable)) {
323347
throw new InvalidOperationException ($"Internal error: variable type '{variable.Type}' is different to its value type, '{valueType}'");
324348
}
325349

326350
WriteValue (context, valueType, variable);
327351
}
328352

353+
bool IsValueAssignableFrom (Type valueType, LlvmIrVariable variable)
354+
{
355+
if (valueType != typeof(string) && valueType != typeof(StringHolder)) {
356+
return valueType.IsAssignableFrom (variable.Type);
357+
}
358+
359+
return variable.Type == typeof(string) || variable.Type == typeof(StringHolder);
360+
}
361+
329362
ulong GetAggregateValueElementCount (GeneratorWriteContext context, LlvmIrVariable variable) => GetAggregateValueElementCount (context, variable.Type, variable.Value, variable as LlvmIrGlobalVariable);
330363

331364
ulong GetAggregateValueElementCount (GeneratorWriteContext context, Type type, object? value, LlvmIrGlobalVariable? globalVariable = null)
@@ -560,7 +593,7 @@ void WriteInlineArray (GeneratorWriteContext context, byte[] bytes, bool encodeA
560593
{
561594
if (encodeAsASCII) {
562595
context.Output.Write ('c');
563-
context.Output.Write (QuoteString (bytes, bytes.Length, out _, nullTerminated: false));
596+
context.Output.Write (QuoteUtf8String (bytes, bytes.Length, out _, nullTerminated: false));
564597
return;
565598
}
566599

@@ -616,7 +649,7 @@ void WriteValue (GeneratorWriteContext context, StructureInstance structInstance
616649
return;
617650
}
618651

619-
WriteValue (context, smi.MemberType, value);
652+
WriteValue (context, smi.MemberType, value, smi.Info.GetStringEncoding (context.TypeCache));
620653
}
621654

622655
bool WriteNativePointerValue (GeneratorWriteContext context, StructureInstance si, StructureMemberInfo smi, object? value)
@@ -670,7 +703,7 @@ string ToHex (BasicType basicTypeDesc, Type type, object? value)
670703
return $"{(basicTypeDesc.IsUnsigned ? prefixUnsigned : prefixSigned)}0x{hex}";
671704
}
672705

673-
void WriteValue (GeneratorWriteContext context, Type type, object? value)
706+
void WriteValue (GeneratorWriteContext context, Type type, object? value, LlvmIrStringEncoding stringEncoding = LlvmIrStringEncoding.UTF8)
674707
{
675708
if (value is LlvmIrVariable variableRef) {
676709
context.Output.Write (variableRef.Reference);
@@ -710,13 +743,13 @@ void WriteValue (GeneratorWriteContext context, Type type, object? value)
710743
return;
711744
}
712745

713-
if (type == typeof(string)) {
746+
if (type == typeof(string) || type == typeof(StringHolder)) {
714747
if (value == null) {
715748
context.Output.Write ("null");
716749
return;
717750
}
718751

719-
LlvmIrStringVariable sv = context.Module.LookupRequiredVariableForString ((string)value);
752+
LlvmIrStringVariable sv = context.Module.LookupRequiredVariableForString (StringHolder.AsHolder (value, stringEncoding));
720753
context.Output.Write (sv.Reference);
721754
return;
722755
}
@@ -775,7 +808,7 @@ void WriteStructureValue (GeneratorWriteContext context, StructureInstance? inst
775808
string? comment = info.GetCommentFromProvider (smi, instance);
776809
if (String.IsNullOrEmpty (comment)) {
777810
var sb = new StringBuilder (" ");
778-
sb.Append (MapManagedTypeToNative (smi));
811+
sb.Append (MapManagedTypeToNative (context, smi));
779812
sb.Append (' ');
780813
sb.Append (smi.Info.Name);
781814
comment = sb.ToString ();
@@ -1460,8 +1493,12 @@ public static string MapManagedTypeToNative (Type type)
14601493
return type.GetShortName ();
14611494
}
14621495

1463-
static string MapManagedTypeToNative (StructureMemberInfo smi)
1496+
static string MapManagedTypeToNative (GeneratorWriteContext context, StructureMemberInfo smi)
14641497
{
1498+
if (smi.Info.IsUnicodeString (context.TypeCache)) {
1499+
return "char16_t*";
1500+
}
1501+
14651502
string nativeType = MapManagedTypeToNative (smi.MemberType);
14661503
// Silly, but effective
14671504
if (nativeType[nativeType.Length - 1] == '*') {
@@ -1487,8 +1524,9 @@ static string MapManagedTypeToNative (StructureMemberInfo smi)
14871524
throw new InvalidOperationException ($"Field '{smi.Info.Name}' of structure '{info.Name}' should have a value of '{expectedType}' type, instead it had a '{value.GetType ()}'");
14881525
}
14891526

1490-
if (valueType == typeof(string)) {
1491-
return context.Module.LookupRequiredVariableForString ((string)value);
1527+
if (valueType == typeof(string) || valueType == typeof(StringHolder)) {
1528+
var encoding = smi.Info.GetStringEncoding (context.TypeCache);
1529+
return context.Module.LookupRequiredVariableForString (StringHolder.AsHolder (value, encoding));
14921530
}
14931531

14941532
return value;
@@ -1555,30 +1593,63 @@ public static string QuoteStringNoEscape (string s)
15551593
return $"\"{s}\"";
15561594
}
15571595

1558-
public static string QuoteString (string value, bool nullTerminated = true)
1596+
public static string QuoteString (LlvmIrStringVariable variable, out ulong stringSize, bool nullTerminated = true)
15591597
{
1560-
return QuoteString (value, out _, nullTerminated);
1561-
}
1598+
if (variable.Encoding == LlvmIrStringEncoding.UTF8) {
1599+
var value = (StringHolder)variable.Value;
1600+
if (value.Data == null) {
1601+
throw new InvalidOperationException ("Internal error: null strings not supported here, they should be handled elsewhere.");
1602+
}
15621603

1563-
public static string QuoteString (byte[] bytes)
1564-
{
1565-
return QuoteString (bytes, bytes.Length, out _, nullTerminated: false);
1604+
int byteCount = Encoding.UTF8.GetByteCount (value.Data);
1605+
var bytes = ArrayPool<byte>.Shared.Rent (byteCount);
1606+
1607+
try {
1608+
Encoding.UTF8.GetBytes (value.Data, 0, value.Data.Length, bytes, 0);
1609+
return QuoteUtf8String (bytes, byteCount, out stringSize, nullTerminated);
1610+
} finally {
1611+
ArrayPool<byte>.Shared.Return (bytes);
1612+
}
1613+
}
1614+
1615+
if (variable.Encoding == LlvmIrStringEncoding.Unicode) {
1616+
return QuoteUnicodeString (variable, out stringSize, nullTerminated);
1617+
}
1618+
1619+
throw new InvalidOperationException ($"Internal error: unsupported string encoding {variable.Encoding}");
15661620
}
15671621

1568-
public static string QuoteString (string value, out ulong stringSize, bool nullTerminated = true)
1622+
static string QuoteUnicodeString (LlvmIrStringVariable variable, out ulong stringSize, bool nullTerminated = true)
15691623
{
1570-
var encoding = Encoding.UTF8;
1571-
int byteCount = encoding.GetByteCount (value);
1572-
var bytes = ArrayPool<byte>.Shared.Rent (byteCount);
1573-
try {
1574-
encoding.GetBytes (value, 0, value.Length, bytes, 0);
1575-
return QuoteString (bytes, byteCount, out stringSize, nullTerminated);
1576-
} finally {
1577-
ArrayPool<byte>.Shared.Return (bytes);
1624+
var value = (StringHolder)variable.Value;
1625+
if (value.Data == null) {
1626+
throw new InvalidOperationException ("Internal error: null strings not supported here, they should be handled elsewhere.");
15781627
}
1628+
1629+
// Each character/lexeme is encoded as iXY u0xVXYZ + comma and a space, and on top of that we have two square brackets and a trailing nul
1630+
var sb = new StringBuilder ((value.Data.Length * 13) + 3); // rough estimate of capacity
1631+
sb.Append ('[');
1632+
for (int i = 0; i < value.Data.Length; i++) {
1633+
var ch = (short)value.Data[i];
1634+
if (i > 0) {
1635+
sb.Append (", ");
1636+
}
1637+
sb.Append ($"{variable.IrType} u0x{ch:X2}");
1638+
}
1639+
1640+
if (nullTerminated) {
1641+
if (value.Data.Length > 0) {
1642+
sb.Append (", ");
1643+
}
1644+
sb.Append ($"{variable.IrType} 0");
1645+
}
1646+
sb.Append (']');
1647+
1648+
stringSize = (ulong)value.Data.Length + (nullTerminated ? 1u : 0u);
1649+
return sb.ToString ();
15791650
}
15801651

1581-
public static string QuoteString (byte[] bytes, int byteCount, out ulong stringSize, bool nullTerminated = true)
1652+
static string QuoteUtf8String (byte[] bytes, int byteCount, out ulong stringSize, bool nullTerminated = true)
15821653
{
15831654
var sb = new StringBuilder (byteCount * 2); // rough estimate of capacity
15841655

src/Xamarin.Android.Build.Tasks/Utilities/LlvmIrGenerator/LlvmIrInstructions.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,8 @@ void WriteArgument (GeneratorWriteContext context, LlvmIrFunctionParameter? para
366366
throw new InvalidOperationException ($"Internal error: value type '{value.GetType ()}' for argument {index} to function '{function.Signature.Name}' is invalid. Expected '{parameter.Type}' or compatible");
367367
}
368368

369-
if (value is string str) {
370-
context.Output.Write (context.Module.LookupRequiredVariableForString (str).Reference);
369+
if (value is string || value is StringHolder) {
370+
context.Output.Write (context.Module.LookupRequiredVariableForString (StringHolder.AsHolder (value)).Reference);
371371
return;
372372
}
373373

0 commit comments

Comments
 (0)