1
+ // Using Html Parsing
2
+ let a = null, Summary = [
3
+ RawHtml = "<html><body>
4
+ <a href=""/003Dn00000b69ED"" target=""\_blank"">Richard Radnay</a>
5
+ <a href=""/12314543"" target=""\_blank"">Jane Doe</a>
6
+ ",
7
+
8
+ Parsed = Html.Table( RawHtml, {
9
+ { "Url", "a", each _ }
10
+ })
11
+
12
+ // delimsList = { "<a href=""", """ target=""_blank"""">", "</a>"},
13
+ // rendList = Text.Combine( delimsList, "#(cr,lf)" ),
14
+ // rendSplit = Text.Combine( splitUrl, "#(cr,lf)" ),
15
+ // SplitSaleTag = Splitter.SplitTextByEachDelimiter( delimsList, QuoteStyle.None ),
16
+ // splitUrl = SplitSaleTag( RawSrc ),
17
+ // s0 = Text.AfterDelimiter(RawSrc, "<a href="""),
18
+ // s1 = Text.BeforeDelimiter(s0, """"),
19
+
20
+
21
+ // parseUrl = (rawTag as text) as record => [
22
+ // // parse html without a parser, but try to be strict
23
+ // strip_href = Text.AfterDelimiter( rawTag, "<a href=""" ),
24
+ // only_href = Text.BeforeDelimiter(strip_href, """" ),
25
+
26
+ // strip_close_a = Text.BeforeDelimiter( rawTag, "</a>" ),
27
+ // a_text_only = Text.AfterDelimiter(strip_close_a, ">" ),
28
+ // ret = [ FullRawText = rawTag, Href = only_href, Name = a_text_only ]
29
+ // ][ret],
30
+ // // ], // toggle lines to see each step in the calculation
31
+
32
+ // final = parseUrl( RawSrc )
33
+
34
+ ],
35
+ Parsed = Summary[Parsed],
36
+ #"Changed Type" = Table.TransformColumnTypes(Parsed,{{"Url", type any}}),
37
+ #"Expanded Url" = Table.ExpandRecordColumn(#"Changed Type", "Url", {"TagName", "TextContent", "Attributes"}, {"Url.TagName", "Url.TextContent", "Url.Attributes"}),
38
+ #"Expanded Url.Attributes" = Table.ExpandRecordColumn(#"Expanded Url", "Url.Attributes", {"href"}, {"Url.Attributes.href"}),
39
+ #"Changed Type1" = Table.TransformColumnTypes(#"Expanded Url.Attributes",{{"Url.Attributes.href", type text}, {"Url.TextContent", type text}, {"Url.TagName", type text}})
40
+ in
41
+ #"Changed Type1"
42
+
43
+ // SingleString UsingSplits by long Delimiters
44
+ let Summary = [
45
+ Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45Wiik1MDBOTlTIKEpNs41R0gfyXPIMQCDJzNLVJUZJoSSxKD21BCgXExOflJOYlx0D0ZQalJmckViUohCUmJKXWAkxSD8RIqcUGwsA", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [RawHtml = _t]),
46
+ #"Changed Type" = Table.TransformColumnTypes(Source,{{"RawHtml", type text}}),
47
+
48
+ RawSrc_FromTable = #"Changed Type"{0}[RawHtml],
49
+
50
+ RawSrc = "<a href=""/003Dn00000b69ED"" target=""\_blank"">Richard Radnay</a>",
51
+
52
+
53
+ delimsList = { "<a href=""", """ target=""_blank"""">", "</a>"},
54
+ rendList = Text.Combine( delimsList, "#(cr,lf)" ),
55
+ rendSplit = Text.Combine( splitUrl, "#(cr,lf)" ),
56
+ SplitSaleTag = Splitter.SplitTextByEachDelimiter( delimsList, QuoteStyle.None ),
57
+ splitUrl = SplitSaleTag( RawSrc ),
58
+ s0 = Text.AfterDelimiter(RawSrc, "<a href="""),
59
+ s1 = Text.BeforeDelimiter(s0, """"),
60
+
61
+
62
+ parseUrl = (rawTag as text) as record => [
63
+ // parse html without a parser, but try to be strict
64
+ strip_href = Text.AfterDelimiter( rawTag, "<a href=""" ),
65
+ only_href = Text.BeforeDelimiter(strip_href, """" ),
66
+
67
+ strip_close_a = Text.BeforeDelimiter( rawTag, "</a>" ),
68
+ a_text_only = Text.AfterDelimiter(strip_close_a, ">" ),
69
+ ret = [ FullRawText = rawTag, Href = only_href, Name = a_text_only ]
70
+ ][ret],
71
+ // ], // toggle lines to see each step in the calculation
72
+
73
+ final = parseUrl( RawSrc )
74
+
75
+ ],
76
+ final = Summary[final] in final
77
+
78
+ // PBIBlog Html Expand Extra Attributes
79
+ let
80
+ a = null,
81
+ Summary = [
82
+ Response = Web.Contents("https://powerbi.microsoft.com/en-us/blog/", [ManualStatusHandling={400, 401, 402, 404, 405} & {405..490} & {500..599}]),
83
+ RawText = Text.FromBinary( Response ),
84
+ ParseHtml = Html.Table( RawText, {{"Link", "a", each _ }})
85
+
86
+
87
+
88
+ ],
89
+ ParseHtml = Summary[ParseHtml],
90
+ #"Expanded Link" = Table.ExpandRecordColumn(ParseHtml, "Link", {"TagName", "TextContent", "Attributes"}, {"Link.TagName", "Link.TextContent", "Link.Attributes"}),
91
+ #"Expanded Link.Attributes" = Table.ExpandRecordColumn(#"Expanded Link", "Link.Attributes", {"class", "href", "id", "aria-label", "data-bi-name", "role", "title", "target", "rel"}, {"Link.Attributes.class", "Link.Attributes.href", "Link.Attributes.id", "Link.Attributes.aria-label", "Link.Attributes.data-bi-name", "Link.Attributes.role", "Link.Attributes.title", "Link.Attributes.target", "Link.Attributes.rel"})
92
+ in
93
+ #"Expanded Link.Attributes"
94
+
95
+ // HtmlParse a Single String
96
+ let
97
+ SingleHtml = "<a href=""/003Dn00000b69ED"" target=""\_blank"">Richard Radnay</a>",
98
+ ParsedAll = Html.Table( SingleHtml, { {"Link", "a", each _ }}),
99
+ #"Expanded Link" = Table.ExpandRecordColumn(ParsedAll, "Link", {"TagName", "TextContent", "Attributes"}, {"Tag", "Content", "Attributes"}),
100
+ #"Expanded Attributes" = Table.ExpandRecordColumn(#"Expanded Link", "Attributes", {"href"}, {"href"}),
101
+ #"Changed Type" = Table.TransformColumnTypes(#"Expanded Attributes",{{"href", type text}, {"Content", type text}, {"Tag", type text}})
102
+ in
103
+ #"Changed Type"
0 commit comments