Skip to content

Commit f4a265b

Browse files
authored
Use grapheme-splitter instead of lodash for saving emoji from being ripped apart (matrix-org#10976)
* Use grapheme-splitter instead of lodash for saving emoji from being ripped apart * Move to a more appropriate place * Add tests and improve types
1 parent 277a3c0 commit f4a265b

File tree

7 files changed

+55
-22
lines changed

7 files changed

+55
-22
lines changed

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
"focus-visible": "^5.2.0",
8383
"gfm.css": "^1.1.2",
8484
"glob-to-regexp": "^0.4.1",
85+
"grapheme-splitter": "^1.0.4",
8586
"highlight.js": "^11.3.1",
8687
"html-entities": "^2.0.0",
8788
"is-ip": "^3.1.0",

src/Avatar.ts

+2-3
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ import { RoomMember } from "matrix-js-sdk/src/models/room-member";
1818
import { User } from "matrix-js-sdk/src/models/user";
1919
import { Room } from "matrix-js-sdk/src/models/room";
2020
import { ResizeMethod } from "matrix-js-sdk/src/@types/partials";
21-
import { split } from "lodash";
2221

2322
import DMRoomMap from "./utils/DMRoomMap";
2423
import { mediaFromMxc } from "./customisations/Media";
2524
import { isLocalRoom } from "./utils/localRoom/isLocalRoom";
25+
import { getFirstGrapheme } from "./utils/strings";
2626

2727
// Not to be used for BaseAvatar urls as that has similar default avatar fallback already
2828
export function avatarUrlForMember(
@@ -133,8 +133,7 @@ export function getInitialLetter(name: string): string | undefined {
133133
name = name.substring(1);
134134
}
135135

136-
// rely on the grapheme cluster splitter in lodash so that we don't break apart compound emojis
137-
return split(name, "", 1)[0].toUpperCase();
136+
return getFirstGrapheme(name).toUpperCase();
138137
}
139138

140139
export function avatarUrlForRoom(

src/HtmlUtils.tsx

+10-5
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ import React, { LegacyRef, ReactElement, ReactNode } from "react";
2121
import sanitizeHtml from "sanitize-html";
2222
import classNames from "classnames";
2323
import EMOJIBASE_REGEX from "emojibase-regex";
24-
import { merge, split } from "lodash";
24+
import { merge } from "lodash";
2525
import katex from "katex";
2626
import { decode } from "html-entities";
2727
import { IContent } from "matrix-js-sdk/src/models/event";
2828
import { Optional } from "matrix-events-sdk";
2929
import _Linkify from "linkify-react";
3030
import escapeHtml from "escape-html";
31+
import GraphemeSplitter from "grapheme-splitter";
3132

3233
import {
3334
_linkifyElement,
@@ -463,14 +464,18 @@ const emojiToJsxSpan = (emoji: string, key: number): JSX.Element => (
463464
* @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis
464465
* and plain text for everything else
465466
*/
466-
function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] {
467+
export function formatEmojis(message: string | undefined, isHtmlMessage?: false): JSX.Element[];
468+
export function formatEmojis(message: string | undefined, isHtmlMessage: true): string[];
469+
export function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] {
467470
const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan;
468471
const result: (JSX.Element | string)[] = [];
472+
if (!message) return result;
473+
469474
let text = "";
470475
let key = 0;
471476

472-
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
473-
for (const char of split(message, "")) {
477+
const splitter = new GraphemeSplitter();
478+
for (const char of splitter.iterateGraphemes(message)) {
474479
if (EMOJIBASE_REGEX.test(char)) {
475480
if (text) {
476481
result.push(text);
@@ -661,7 +666,7 @@ export function topicToHtml(
661666
isFormattedTopic = false; // Fall back to plain-text topic
662667
}
663668

664-
let emojiBodyElements: ReturnType<typeof formatEmojis> | undefined;
669+
let emojiBodyElements: JSX.Element[] | undefined;
665670
if (!isFormattedTopic && topicHasEmoji) {
666671
emojiBodyElements = formatEmojis(topic, false);
667672
}

src/editor/parts.ts

+6-7
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,19 @@ See the License for the specific language governing permissions and
1515
limitations under the License.
1616
*/
1717

18-
import { split } from "lodash";
1918
import EMOJIBASE_REGEX from "emojibase-regex";
2019
import { MatrixClient } from "matrix-js-sdk/src/client";
2120
import { RoomMember } from "matrix-js-sdk/src/models/room-member";
2221
import { Room } from "matrix-js-sdk/src/models/room";
22+
import GraphemeSplitter from "grapheme-splitter";
2323

2424
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
2525
import { unicodeToShortcode } from "../HtmlUtils";
2626
import * as Avatar from "../Avatar";
2727
import defaultDispatcher from "../dispatcher/dispatcher";
2828
import { Action } from "../dispatcher/actions";
2929
import SettingsStore from "../settings/SettingsStore";
30+
import { getFirstGrapheme } from "../utils/strings";
3031

3132
const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b);
3233

@@ -133,8 +134,7 @@ abstract class BasePart {
133134
// To only need to grapheme split the bits of the string we're working on.
134135
let buffer = str;
135136
while (buffer) {
136-
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
137-
const [char] = split(buffer, "", 2);
137+
const char = getFirstGrapheme(buffer);
138138
if (!this.acceptsInsertion(char, offset + str.length - buffer.length, inputType)) {
139139
break;
140140
}
@@ -562,8 +562,7 @@ export class PartCreator {
562562
case "\n":
563563
return new NewlinePart();
564564
default:
565-
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
566-
if (EMOJIBASE_REGEX.test(split(input, "", 2)[0])) {
565+
if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) {
567566
return new EmojiPart();
568567
}
569568
return new PlainPart();
@@ -639,8 +638,8 @@ export class PartCreator {
639638
const parts: (PlainPart | EmojiPart)[] = [];
640639
let plainText = "";
641640

642-
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
643-
for (const char of split(text, "")) {
641+
const splitter = new GraphemeSplitter();
642+
for (const char of splitter.iterateGraphemes(text)) {
644643
if (EMOJIBASE_REGEX.test(char)) {
645644
if (plainText) {
646645
parts.push(this.plain(plainText));

src/utils/strings.ts

+13
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ limitations under the License.
2121
* @param text the plaintext to put in the user's clipboard
2222
*/
2323
import { logger } from "matrix-js-sdk/src/logger";
24+
import GraphemeSplitter from "grapheme-splitter";
2425

2526
export async function copyPlaintext(text: string): Promise<boolean> {
2627
try {
@@ -83,3 +84,15 @@ export function copyNode(ref?: Element | null): boolean {
8384
export function getSelectedText(): string {
8485
return window.getSelection()!.toString();
8586
}
87+
88+
/**
89+
* Returns the first grapheme in the given string,
90+
* especially useful for strings containing emoji, will not break compound emoji up.
91+
* @param str string to parse
92+
* @returns the first grapheme or an empty string if given an empty string
93+
*/
94+
export function getFirstGrapheme(str: string): string {
95+
const splitter = new GraphemeSplitter();
96+
const result = splitter.iterateGraphemes(str).next();
97+
return result.done ? "" : result.value;
98+
}

test/HtmlUtils-test.tsx

+17-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import { mocked } from "jest-mock";
1919
import { render, screen } from "@testing-library/react";
2020
import { IContent } from "matrix-js-sdk/src/models/event";
2121

22-
import { bodyToHtml, topicToHtml } from "../src/HtmlUtils";
22+
import { bodyToHtml, formatEmojis, topicToHtml } from "../src/HtmlUtils";
2323
import SettingsStore from "../src/settings/SettingsStore";
2424

2525
jest.mock("../src/settings/SettingsStore");
@@ -168,3 +168,19 @@ describe("bodyToHtml", () => {
168168
});
169169
});
170170
});
171+
172+
describe("formatEmojis", () => {
173+
it.each([
174+
["🏴󠁧󠁢󠁥󠁮󠁧󠁿", [["🏴󠁧󠁢󠁥󠁮󠁧󠁿", "flag-england"]]],
175+
["🏴󠁧󠁢󠁳󠁣󠁴󠁿", [["🏴󠁧󠁢󠁳󠁣󠁴󠁿", "flag-scotland"]]],
176+
["🏴󠁧󠁢󠁷󠁬󠁳󠁿", [["🏴󠁧󠁢󠁷󠁬󠁳󠁿", "flag-wales"]]],
177+
])("%s emoji", (emoji, expectations) => {
178+
const res = formatEmojis(emoji, false);
179+
expect(res).toHaveLength(expectations.length);
180+
for (let i = 0; i < res.length; i++) {
181+
const [emoji, title] = expectations[i];
182+
expect(res[i].props.children).toEqual(emoji);
183+
expect(res[i].props.title).toEqual(`:${title}:`);
184+
}
185+
});
186+
});

test/__snapshots__/HtmlUtils-test.tsx.snap

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
// Jest Snapshot v1, https://goo.gl/fbAQLP
22

3+
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
4+
5+
exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hello</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span></span><p>world</p>"`;
6+
7+
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;
8+
39
exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
410
<DocumentFragment>
511
<span
@@ -15,9 +21,3 @@ exports[`bodyToHtml should generate big emoji for an emoji-only reply to a messa
1521
</span>
1622
</DocumentFragment>
1723
`;
18-
19-
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
20-
21-
exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hello</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span></span><p>world</p>"`;
22-
23-
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;

0 commit comments

Comments
 (0)