Skip to content

Commit c7da4f7

Browse files
authored
Merge branch 'main' into fix-pnc-try
2 parents 1aff18d + cf1cfef commit c7da4f7

File tree

52 files changed

+2317
-3161
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+2317
-3161
lines changed

crates/compiler/builtins/bitcode/src/main.zig

+2
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ comptime {
213213
exportStrFn(str.strAllocationPtr, "allocation_ptr");
214214
exportStrFn(str.strReleaseExcessCapacity, "release_excess_capacity");
215215
exportStrFn(str.strWithAsciiLowercased, "with_ascii_lowercased");
216+
exportStrFn(str.strWithAsciiUppercased, "with_ascii_uppercased");
217+
exportStrFn(str.strCaselessAsciiEquals, "caseless_ascii_equals");
216218

217219
for (INTEGERS) |T| {
218220
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");

crates/compiler/builtins/bitcode/src/str.zig

+129-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ pub const RocStr = extern struct {
179179

180180
pub fn eq(self: RocStr, other: RocStr) bool {
181181
// If they are byte-for-byte equal, they're definitely equal!
182-
if (self.bytes == other.bytes and self.length == other.length and self.capacity_or_alloc_ptr == other.capacity_or_alloc_ptr) {
182+
if (self.bytes == other.bytes and self.length == other.length) {
183183
return true;
184184
}
185185

@@ -2195,6 +2195,134 @@ test "withAsciiLowercased: seamless slice" {
21952195
try expect(str_result.eq(expected));
21962196
}
21972197

2198+
// Str.with_ascii_uppercased
2199+
pub fn strWithAsciiUppercased(string: RocStr) callconv(.C) RocStr {
2200+
var new_str = if (string.isUnique())
2201+
string
2202+
else blk: {
2203+
string.decref();
2204+
break :blk RocStr.fromSlice(string.asSlice());
2205+
};
2206+
2207+
const new_str_bytes = new_str.asU8ptrMut()[0..string.len()];
2208+
for (new_str_bytes) |*c| {
2209+
c.* = ascii.toUpper(c.*);
2210+
}
2211+
return new_str;
2212+
}
2213+
2214+
test "withAsciiUppercased: small str" {
2215+
const original = RocStr.fromSlice("coffé");
2216+
try expect(original.isSmallStr());
2217+
2218+
const expected = RocStr.fromSlice("COFFé");
2219+
defer expected.decref();
2220+
2221+
const str_result = strWithAsciiUppercased(original);
2222+
defer str_result.decref();
2223+
2224+
try expect(str_result.isSmallStr());
2225+
try expect(str_result.eq(expected));
2226+
}
2227+
2228+
test "withAsciiUppercased: non small str" {
2229+
const original = RocStr.fromSlice("coffé coffé coffé coffé coffé coffé");
2230+
defer original.decref();
2231+
try expect(!original.isSmallStr());
2232+
2233+
const expected = RocStr.fromSlice("COFFé COFFé COFFé COFFé COFFé COFFé");
2234+
defer expected.decref();
2235+
2236+
const str_result = strWithAsciiUppercased(original);
2237+
2238+
try expect(!str_result.isSmallStr());
2239+
try expect(str_result.eq(expected));
2240+
}
2241+
2242+
test "withAsciiUppercased: seamless slice" {
2243+
const l = RocStr.fromSlice("coffé coffé coffé coffé coffé coffé");
2244+
const original = substringUnsafeC(l, 1, l.len() - 1);
2245+
defer original.decref();
2246+
2247+
try expect(original.isSeamlessSlice());
2248+
2249+
const expected = RocStr.fromSlice("OFFé COFFé COFFé COFFé COFFé COFFé");
2250+
defer expected.decref();
2251+
2252+
const str_result = strWithAsciiUppercased(original);
2253+
2254+
try expect(!str_result.isSmallStr());
2255+
try expect(str_result.eq(expected));
2256+
}
2257+
2258+
pub fn strCaselessAsciiEquals(self: RocStr, other: RocStr) callconv(.C) bool {
2259+
if (self.bytes == other.bytes and self.length == other.length) {
2260+
return true;
2261+
}
2262+
2263+
return ascii.eqlIgnoreCase(self.asSlice(), other.asSlice());
2264+
}
2265+
2266+
test "caselessAsciiEquals: same str" {
2267+
const str1 = RocStr.fromSlice("coFféÉ");
2268+
defer str1.decref();
2269+
2270+
const are_equal = strCaselessAsciiEquals(str1, str1);
2271+
try expect(are_equal);
2272+
}
2273+
2274+
test "caselessAsciiEquals: differently capitalized non-ascii char" {
2275+
const str1 = RocStr.fromSlice("coffé");
2276+
defer str1.decref();
2277+
try expect(str1.isSmallStr());
2278+
2279+
const str2 = RocStr.fromSlice("coffÉ");
2280+
defer str2.decref();
2281+
2282+
const are_equal = strCaselessAsciiEquals(str1, str2);
2283+
try expect(!are_equal);
2284+
}
2285+
2286+
test "caselessAsciiEquals: small str" {
2287+
const str1 = RocStr.fromSlice("coffé");
2288+
defer str1.decref();
2289+
try expect(str1.isSmallStr());
2290+
2291+
const str2 = RocStr.fromSlice("COFFé");
2292+
defer str2.decref();
2293+
2294+
const are_equal = strCaselessAsciiEquals(str1, str2);
2295+
try expect(are_equal);
2296+
}
2297+
2298+
test "caselessAsciiEquals: non small str" {
2299+
const str1 = RocStr.fromSlice("coffé coffé coffé coffé coffé coffé");
2300+
defer str1.decref();
2301+
try expect(!str1.isSmallStr());
2302+
2303+
const str2 = RocStr.fromSlice("COFFé COFFé COFFé COFFé COFFé COFFé");
2304+
defer str2.decref();
2305+
2306+
const are_equal = strCaselessAsciiEquals(str1, str2);
2307+
2308+
try expect(are_equal);
2309+
}
2310+
2311+
test "caselessAsciiEquals: seamless slice" {
2312+
const l = RocStr.fromSlice("coffé coffé coffé coffé coffé coffé");
2313+
const str1 = substringUnsafeC(l, 1, l.len() - 1);
2314+
defer str1.decref();
2315+
2316+
try expect(str1.isSeamlessSlice());
2317+
2318+
const str2 = RocStr.fromSlice("OFFé COFFé COFFé COFFé COFFé COFFé");
2319+
defer str2.decref();
2320+
2321+
const are_equal = strCaselessAsciiEquals(str1, str2);
2322+
2323+
try expect(are_equal);
2324+
}
2325+
21982326
fn rcNone(_: ?[*]u8) callconv(.C) void {}
21992327

22002328
fn decStr(ptr: ?[*]u8) callconv(.C) void {

crates/compiler/builtins/roc/Str.roc

+68-2
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ module [
374374
drop_prefix,
375375
drop_suffix,
376376
with_ascii_lowercased,
377+
with_ascii_uppercased,
378+
caseless_ascii_equals,
377379
]
378380

379381
import Bool exposing [Bool]
@@ -1348,7 +1350,71 @@ drop_suffix = |haystack, suffix|
13481350
## for Unicode capitalization that can be upgraded independently from the language's builtins.
13491351
##
13501352
## To do a case-insensitive comparison of the ASCII characters in a string,
1351-
## use [`caseless_ascii_equals`](#caseless_ascii_equals).
1353+
## use [Str.caseless_ascii_equals].
13521354
with_ascii_lowercased : Str -> Str
13531355

1354-
expect Str.with_ascii_lowercased("cOFFÉ") == "coffÉ"
1356+
expect Str.with_ascii_lowercased("CAFÉ") == "cafÉ"
1357+
1358+
## Returns a version of the string with all [ASCII characters](https://en.wikipedia.org/wiki/ASCII) uppercased.
1359+
## Non-ASCII characters are left unmodified. For example:
1360+
##
1361+
## ```roc
1362+
## expect "café".with_ascii_uppercased() == "CAFé"
1363+
## ```
1364+
##
1365+
## This function is useful for things like
1366+
## [command-line options](https://en.wikipedia.org/wiki/Command-line_interface#Command-line_option)
1367+
## and [environment variables](https://en.wikipedia.org/wiki/Environment_variable)
1368+
## know in advance that you're dealing with a hardcoded string containing only ASCII characters.
1369+
## It has better performance than lowercasing operations which take Unicode into account.
1370+
##
1371+
## That said, strings received from user input can always contain
1372+
## non-ASCII Unicode characters, and uppercasing [Unicode](https://unicode.org)
1373+
## works differently in different languages.
1374+
## For example, the string `"i"` uppercases to `"I"` in English and to `"İ"`
1375+
## (a [dotted I](https://en.wikipedia.org/wiki/%C4%B0)) in Turkish.
1376+
## These rules can also change in each Unicode release,
1377+
## so we have a separate [`unicode` package](https://github.com/roc-lang/unicode) for Unicode capitalization
1378+
## that can be upgraded independently from the language's builtins.
1379+
##
1380+
## To do a case-insensitive comparison of the ASCII characters in a string,
1381+
## use [Str.caseless_ascii_equals].
1382+
with_ascii_uppercased : Str -> Str
1383+
1384+
expect Str.with_ascii_uppercased("café") == "CAFé"
1385+
1386+
## Returns `True` if all the [ASCII characters](https://en.wikipedia.org/wiki/ASCII) in the string are the same
1387+
## when ignoring differences in capitalization.
1388+
## Non-ASCII characters must all be exactly the same,
1389+
## including capitalization. For example:
1390+
##
1391+
## ```roc
1392+
## expect "café".caseless_ascii_equals("CAFé")
1393+
##
1394+
## expect !"café".caseless_ascii_equals("CAFÉ")
1395+
## ```
1396+
##
1397+
## The first call returns `True` because all the ASCII characters are the same
1398+
## when ignoring differences in capitalization, and the only non-ASCII character
1399+
## (`é`) is the same in both strings. The second call returns `False`because
1400+
## `é` and `É` are not ASCII characters, and they are different.
1401+
##
1402+
## This function is useful for things like [command-line options](https://en.wikipedia.org/wiki/Command-line_interface#Command-line_option)
1403+
## and [environment variables](https://en.wikipedia.org/wiki/Environment_variable)
1404+
## know in advance that you're dealing with a hardcoded string containing only ASCII characters.
1405+
## It has better performance than lowercasing operations which take Unicode into account.
1406+
##
1407+
## That said, strings received from user input can always contain
1408+
## non-ASCII Unicode characters, and lowercasing [Unicode](https://unicode.org) works
1409+
## differently in different languages. For example, the string `"I"` lowercases to `"i"`
1410+
## in English and to `"ı"` (a [dotless i](https://en.wikipedia.org/wiki/Dotless_I))
1411+
## in Turkish. These rules can also change in each [Unicode release](https://www.unicode.org/releases/),
1412+
## so we have separate [`unicode` package](https://github.com/roc-lang/unicode)
1413+
## for Unicode capitalization that can be upgraded independently from the language's builtins.
1414+
##
1415+
## To convert a string's ASCII characters to uppercase or lowercase, use [Str.with_ascii_uppercased]
1416+
## and [Str.with_ascii_lowercased].
1417+
caseless_ascii_equals : Str, Str -> Bool
1418+
1419+
expect Str.caseless_ascii_equals("café", "CAFé")
1420+
expect !Str.caseless_ascii_equals("café", "CAFÉ")

crates/compiler/builtins/src/bitcode.rs

+2
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,8 @@ pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
360360
pub const STR_ALLOCATION_PTR: &str = "roc_builtins.str.allocation_ptr";
361361
pub const STR_RELEASE_EXCESS_CAPACITY: &str = "roc_builtins.str.release_excess_capacity";
362362
pub const STR_WITH_ASCII_LOWERCASED: &str = "roc_builtins.str.with_ascii_lowercased";
363+
pub const STR_WITH_ASCII_UPPERCASED: &str = "roc_builtins.str.with_ascii_uppercased";
364+
pub const STR_CASELESS_ASCII_EQUALS: &str = "roc_builtins.str.caseless_ascii_equals";
363365

364366
pub const LIST_MAP: &str = "roc_builtins.list.map";
365367
pub const LIST_MAP2: &str = "roc_builtins.list.map2";

crates/compiler/can/src/builtins.rs

+2
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ map_symbol_to_lowlevel_and_arity! {
132132
StrWithCapacity; STR_WITH_CAPACITY; 1,
133133
StrReleaseExcessCapacity; STR_RELEASE_EXCESS_CAPACITY; 1,
134134
StrWithAsciiLowercased; STR_WITH_ASCII_LOWERCASED; 1,
135+
StrWithAsciiUppercased; STR_WITH_ASCII_UPPERCASED; 1,
136+
StrCaselessAsciiEquals; STR_CASELESS_ASCII_EQUALS; 2,
135137

136138
ListLenUsize; LIST_LEN_USIZE; 1,
137139
ListLenU64; LIST_LEN_U64; 1,

crates/compiler/gen_dev/src/lib.rs

+14
Original file line numberDiff line numberDiff line change
@@ -1726,6 +1726,20 @@ trait Backend<'a> {
17261726
arg_layouts,
17271727
ret_layout,
17281728
),
1729+
LowLevel::StrWithAsciiUppercased => self.build_fn_call(
1730+
sym,
1731+
bitcode::STR_WITH_ASCII_UPPERCASED.to_string(),
1732+
args,
1733+
arg_layouts,
1734+
ret_layout,
1735+
),
1736+
LowLevel::StrCaselessAsciiEquals => self.build_fn_call(
1737+
sym,
1738+
bitcode::STR_CASELESS_ASCII_EQUALS.to_string(),
1739+
args,
1740+
arg_layouts,
1741+
ret_layout,
1742+
),
17291743
LowLevel::StrToNum => {
17301744
let number_layout = match self.interner().get_repr(*ret_layout) {
17311745
LayoutRepr::Struct(field_layouts) => field_layouts[0], // TODO: why is it sometimes a struct?

crates/compiler/gen_llvm/src/llvm/lowlevel.rs

+22
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,28 @@ pub(crate) fn run_low_level<'a, 'ctx>(
626626
bitcode::STR_WITH_ASCII_LOWERCASED,
627627
)
628628
}
629+
StrWithAsciiUppercased => {
630+
arguments!(string);
631+
632+
call_str_bitcode_fn(
633+
env,
634+
&[string],
635+
&[],
636+
BitcodeReturns::Str,
637+
bitcode::STR_WITH_ASCII_UPPERCASED,
638+
)
639+
}
640+
StrCaselessAsciiEquals => {
641+
arguments!(string1, string2);
642+
643+
call_str_bitcode_fn(
644+
env,
645+
&[string1, string2],
646+
&[],
647+
BitcodeReturns::Basic,
648+
bitcode::STR_CASELESS_ASCII_EQUALS,
649+
)
650+
}
629651
ListConcat => {
630652
debug_assert_eq!(args.len(), 2);
631653

crates/compiler/gen_wasm/src/low_level.rs

+6
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,12 @@ impl<'a> LowLevelCall<'a> {
262262
StrWithAsciiLowercased => {
263263
self.load_args_and_call_zig(backend, bitcode::STR_WITH_ASCII_LOWERCASED)
264264
}
265+
StrWithAsciiUppercased => {
266+
self.load_args_and_call_zig(backend, bitcode::STR_WITH_ASCII_UPPERCASED)
267+
}
268+
StrCaselessAsciiEquals => {
269+
self.load_args_and_call_zig(backend, bitcode::STR_CASELESS_ASCII_EQUALS)
270+
}
265271

266272
// List
267273
ListLenU64 => {

crates/compiler/module/src/low_level.rs

+4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ pub enum LowLevel {
2828
StrWithCapacity,
2929
StrReleaseExcessCapacity,
3030
StrWithAsciiLowercased,
31+
StrWithAsciiUppercased,
32+
StrCaselessAsciiEquals,
3133
ListLenUsize,
3234
ListLenU64,
3335
ListWithCapacity,
@@ -269,6 +271,8 @@ map_symbol_to_lowlevel! {
269271
StrWithCapacity <= STR_WITH_CAPACITY;
270272
StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY;
271273
StrWithAsciiLowercased <= STR_WITH_ASCII_LOWERCASED;
274+
StrWithAsciiUppercased <= STR_WITH_ASCII_UPPERCASED;
275+
StrCaselessAsciiEquals <= STR_CASELESS_ASCII_EQUALS;
272276
ListLenU64 <= LIST_LEN_U64;
273277
ListLenUsize <= LIST_LEN_USIZE;
274278
ListGetCapacity <= LIST_CAPACITY;

crates/compiler/module/src/symbol.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -1421,10 +1421,12 @@ define_builtins! {
14211421
49 STR_DROP_PREFIX: "drop_prefix"
14221422
50 STR_DROP_SUFFIX: "drop_suffix"
14231423
51 STR_WITH_ASCII_LOWERCASED: "with_ascii_lowercased"
1424-
52 STR_FROM_UTF16: "from_utf16"
1425-
53 STR_FROM_UTF16_LOSSY: "from_utf16_lossy"
1426-
54 STR_FROM_UTF32: "from_utf32"
1427-
55 STR_FROM_UTF32_LOSSY: "from_utf32_lossy"
1424+
52 STR_WITH_ASCII_UPPERCASED: "with_ascii_uppercased"
1425+
53 STR_CASELESS_ASCII_EQUALS: "caseless_ascii_equals"
1426+
54 STR_FROM_UTF16: "from_utf16"
1427+
55 STR_FROM_UTF16_LOSSY: "from_utf16_lossy"
1428+
56 STR_FROM_UTF32: "from_utf32"
1429+
57 STR_FROM_UTF32_LOSSY: "from_utf32_lossy"
14281430
}
14291431
6 LIST: "List" => {
14301432
0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias

crates/compiler/mono/src/drop_specialization.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1550,6 +1550,8 @@ fn low_level_no_rc(lowlevel: &LowLevel) -> RC {
15501550
StrJoinWith => RC::NoRc,
15511551
ListSortWith => RC::Rc,
15521552
StrWithAsciiLowercased => RC::Rc,
1553+
StrWithAsciiUppercased => RC::Rc,
1554+
StrCaselessAsciiEquals => RC::NoRc,
15531555

15541556
ListAppendUnsafe
15551557
| ListReserve

crates/compiler/mono/src/inc_dec.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,8 @@ pub(crate) fn lowlevel_borrow_signature(op: LowLevel) -> &'static [Ownership] {
12591259
ListIncref => &[OWNED],
12601260
ListDecref => &[OWNED],
12611261
StrWithAsciiLowercased => &[OWNED],
1262+
StrWithAsciiUppercased => &[OWNED],
1263+
StrCaselessAsciiEquals => &[BORROWED, BORROWED],
12621264

12631265
Eq | NotEq => &[BORROWED, BORROWED],
12641266

crates/compiler/mono/src/reset_reuse.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -1241,7 +1241,13 @@ impl<'a> ReuseEnvironment<'a> {
12411241
Retrieve the layout of a symbol.
12421242
*/
12431243
fn get_symbol_layout(&self, symbol: Symbol) -> &LayoutOption<'a> {
1244-
self.symbol_layouts.get(&symbol).expect("Expected symbol to have a layout. It should have been inserted in the environment already.")
1244+
self.symbol_layouts
1245+
.get(&symbol)
1246+
.expect(
1247+
"Expected symbol to have a layout. \
1248+
It should have been inserted in the environment already. \
1249+
We are investigating this issue, follow github.com/roc-lang/roc/issues/7461 for updates.",
1250+
)
12451251
}
12461252

12471253
/**

0 commit comments

Comments
 (0)