|
| 1 | +""" |
| 2 | +Demonstrates emoji, grapheme clusters, complex language text |
| 3 | +""" |
| 4 | +from rich.console import Console |
| 5 | +from rich.panel import Panel |
| 6 | +from rich.table import Table |
| 7 | + |
| 8 | +def main(): |
| 9 | + console = Console() |
| 10 | + |
| 11 | + table = Table(title="Emoji & Grapheme Clusters") |
| 12 | + table.add_column("Type", style="cyan") |
| 13 | + table.add_column("Example", justify="center") |
| 14 | + table.add_column("Codepoints", style="dim") |
| 15 | + # all emojis are written with \uEscapes because editors also have a hard time |
| 16 | + table.add_row( |
| 17 | + "ZWJ Family", |
| 18 | + "\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466", |
| 19 | + "4 emoji + 3 ZWJ", |
| 20 | + ) |
| 21 | + table.add_row("Skin Tone", "\U0001F44B\U0001F3FD", "wave + modifier") |
| 22 | + table.add_row( |
| 23 | + "Flags", |
| 24 | + "\U0001F1E8\U0001F1E6 \U0001F1EC\U0001F1E7 \U0001F1EF\U0001F1F5", |
| 25 | + "regional indicators", |
| 26 | + ) |
| 27 | + table.add_row( |
| 28 | + "VS16 Emoji", "\u2764\uFE0F \u2728 \u267B\uFE0F \u2615", "with variation selectors" |
| 29 | + ) |
| 30 | + table.add_row( |
| 31 | + "Keycap", "1\uFE0F\u20E3 2\uFE0F\u20E3 #\uFE0F\u20E3", "digit + VS16 + combining" |
| 32 | + ) |
| 33 | + table.add_row("Combining", "e\u0301 n\u0303 u\u0308", "base + combining accent") |
| 34 | + table.add_row("CJK", "\u5bcc\u58eb\u5c71 \u6771\u4eac", "wide characters") |
| 35 | + table.add_row( |
| 36 | + "Standalone Skin Tone", "\U0001F3FB \U0001F3FD \U0001F3FF", "modifiers without base" |
| 37 | + ) |
| 38 | + table.add_row( |
| 39 | + "Standalone Regional", "\U0001F1E8 \U0001F1E6", "unpaired indicators" |
| 40 | + ) |
| 41 | + |
| 42 | + |
| 43 | + console.print(table, justify="center") |
| 44 | + console.print() |
| 45 | + |
| 46 | + # Complex scripts with combining marks, zero-width characters, or stacking |
| 47 | + # (first lines from UDHR Article 1) https://github.com/eric-muller/udhr |
| 48 | + text = ( |
| 49 | + "[bold]Complex Scripts (UDHR Article 1)[/]\n\n" |
| 50 | + # Arabic: RTL, combining vowel marks (harakat) |
| 51 | + "[cyan]Arabic:[/] \u064A\u0648\u0644\u062F \u062C\u0645\u064A\u0639" |
| 52 | + " \u0627\u0644\u0646\u0627\u0633 \u0623\u062D\u0631\u0627\u0631\u0627\n" |
| 53 | + # Hindi (Devanagari): combining vowel signs, virama conjuncts |
| 54 | + "[cyan]Hindi:[/] \u0938\u092D\u0940 \u092E\u0928\u0941\u0937\u094D\u092F\u094B\u0902" |
| 55 | + " \u0915\u094B \u0917\u094C\u0930\u0935 \u0914\u0930" |
| 56 | + " \u0905\u0927\u093F\u0915\u093E\u0930\u094B\u0902\n" |
| 57 | + # Thai: above/below combining vowels and tone marks |
| 58 | + "[cyan]Thai:[/] \u0E21\u0E19\u0E38\u0E29\u0E22\u0E4C\u0E17\u0E31\u0E49\u0E07" |
| 59 | + "\u0E2B\u0E25\u0E32\u0E22\u0E40\u0E01\u0E34\u0E14\u0E21\u0E32\n" |
| 60 | + # Tibetan: stacked consonants, subjoined letters |
| 61 | + "[cyan]Tibetan:[/] \u0F66\u0F90\u0FB1\u0F7A\u0F0B\u0F56\u0F7C\u0F0B" |
| 62 | + "\u0F58\u0F72\u0F0B\u0F62\u0F72\u0F42\u0F66\u0F0B\u0F40\u0F72\n" |
| 63 | + # Sinhala: round/curvy script, virama conjuncts |
| 64 | + "[cyan]Sinhala:[/] \u0DC3\u0DD2\u0DBA\u0DBD\u0DD4 \u0DB8\u0DB1\u0DD4" |
| 65 | + "\u0DC2\u0DCA\u200D\u0DBA\u0DBA\u0DB1\u0DCA\n" |
| 66 | + # Hebrew: RTL, combining niqqud vowel points |
| 67 | + "[cyan]Hebrew:[/] \u05DB\u05BC\u05B8\u05DC\u05BE\u05D1\u05B0\u05E0\u05B5\u05D9" |
| 68 | + " \u05D4\u05B8\u05D0\u05B8\u05D3\u05B8\u05DD\n" |
| 69 | + # French: decomposed combining accents (ê é) |
| 70 | + "[cyan]French:[/] Tous les e\u0302tres humains naissent e\u0301gaux\n" |
| 71 | + # Polish: combining ogonek and acute (ą ę ó) |
| 72 | + "[cyan]Polish:[/] Wszyscy ludzie rodza\u0328 sie\u0328 ro\u0301wni\n" |
| 73 | + # Yoruba: combining tone marks (à é í ò) |
| 74 | + "[cyan]Yoruba:[/] Gbogbo e\u0300ni\u0301ya\u0301n la\u0301 a bi ni\u0301" |
| 75 | + ) |
| 76 | + console.print(Panel(text, title="\U0001F30D Languages", border_style="blue")) |
| 77 | + |
| 78 | + |
| 79 | +if __name__ == "__main__": |
| 80 | + main() |
0 commit comments