Skip to content

Commit ab53b6a

Browse files
committed
new: trim columns, InspectInvisibleText, Text.FormatControlSymbols, etc
1 parent 9a51f3c commit ab53b6a

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
let
2+
// ignore this step, it's just "enter-data" which is JSON
3+
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WSspPUtJRyi9KzEtPVUjKKU0F8hKVYnWilbJS84DsotQUIJmkFBsLAA==", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [#" User " = _t, #" Color " = _t, #" has Newline #(lf)#(lf)" = _t]),
4+
5+
/*
6+
Visualize which exact characters exist in your header. these are codepoints as hex.
7+
Input: " Color "
8+
Output: "Hex: 20, 20, 20, 20, 43, 6f, 6c, 6f, 72, 20"
9+
*/
10+
InspectInvisibleText = ( columnNames as list ) =>
11+
let return =
12+
List.Transform( columnNames, ( curName ) => [
13+
HasWhitespace = RawName <> Text.Trim( curName ),
14+
RawName = curName,
15+
Symbols = Text.FormatControlSymbols( curName ),
16+
HexSummary = "Hex: " & Text.Combine( AsHexStr, ", " ),
17+
DecSummary = "Dec: " & Text.Combine(
18+
List.Transform( Codepoints, each Text.From(_) ),
19+
", " ),
20+
21+
CharList = Text.ToList( RawName ),
22+
Codepoints = List.Transform( CharList, each Character.ToNumber( _ ) ),
23+
AsHexStr = List.Transform( Codepoints, each Number.ToText( _, "x" ) )
24+
] )
25+
in return,
26+
27+
/*
28+
techincal note:
29+
If you know c#, `Text.ToList` is technically returning a `char[]`, not a `System.Text.Rune[]`
30+
If you're curious, there's a pretty decent technical summary of the differences here:
31+
32+
- Character encoding in dotnet: https://learn.microsoft.com/en-us/dotnet/standard/base-types/character-encoding-introduction
33+
- What character is a hex codepoint, like 20?: https://www.compart.com/en/unicode/U+0020
34+
- To see all ascii characters on one page: https://www.compart.com/en/unicode/block/U+0000
35+
That shows the first 128 characters:
36+
is 0-127 in decimal
37+
or 0x0-0x7f in hexadecimal
38+
*/
39+
40+
/*
41+
input: text with whitespace
42+
output: text with whitespace visualized
43+
Techincally this isn't codepoints, so it's not perfect, but works for all of ascii
44+
45+
probably poor performance
46+
*/
47+
Text.FormatControlSymbols = (string as text) as text =>
48+
let
49+
CharList = Text.ToList( string ),
50+
Symbols = List.Transform(
51+
CharList,
52+
(char) => [
53+
codepoint = Character.ToNumber( char ),
54+
newChar =
55+
if codepoint <= 0x20
56+
then Character.FromNumber( codepoint + 0x2400 )
57+
else char
58+
][newChar]
59+
),
60+
61+
return = Text.Combine( Symbols, "" )
62+
63+
in return,
64+
65+
66+
raw_names = Table.ColumnNames( Source ),
67+
inspected = InspectInvisibleText( raw_names ),
68+
69+
/* bonus section:
70+
you can use codepoints in powerquery strings, as literals
71+
It uses hex notation.
72+
73+
Note: It requires either 4-digits or 8-digits. So "20" has to be "0020" or "00000020"
74+
*/
75+
76+
77+
ExampleCodepointsInPQ = [
78+
RawStr = " ",
79+
AsInline = "#(0020)",
80+
AsCodepoint = Character.FromNumber( 32 ),
81+
AsCodepointFromHex = Character.FromNumber( 0x20 ),
82+
83+
// test that shows they builds the exactly identical string:
84+
AreExactlyEqual =
85+
1 = List.Count( List.Distinct(
86+
{ RawStr, AsInline, AsCodepoint, AsCodepointFromHex }, Comparer.Ordinal
87+
) )
88+
],
89+
90+
Summary = Table.FromRecords( inspected )
91+
in
92+
Summary
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
let
2+
// ignore this step, it's just "enter-data" which is JSON
3+
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WSspPUtJRyi9KzEtPVUjKKU0F8hKVYnWilbJS84DsotQUIJmkFBsLAA==", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [#" User " = _t, #" Color " = _t, #" has Newline #(lf)#(lf)" = _t]),
4+
5+
// the real code
6+
#"Cleanup Blank ColumnNames" =
7+
Table.TransformColumnNames( Source, each Text.Trim( _ ) )
8+
in
9+
#"Cleanup Blank ColumnNames"

0 commit comments

Comments
 (0)