-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtypes.ts
169 lines (158 loc) · 4.27 KB
/
types.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
// To do: redirects
// Source: https://github.com/tatuylonen/wiktextract#format-of-the-extracted-word-entries
export type WordEntry = {
word: string;
pos: Pos;
lang: Lang; // Such as "English"
lang_code?: string; // Such as "en"
senses?: Sense[];
forms?: Form[];
sounds?: Sound[];
categories?: object[];
topics?: object[];
translations?: Translation[]; // See note
etymology_text?: string;
etymology_templates?: Template[];
etymology_number?: number;
descendants?: Descendant[];
wikidata?: string;
wikipedia?: string[]; // Mistake in GitHub repo: "wiktionary"
head_templates?: Template[];
inflection_templates?: Template[]; // To check
};
type Lang = string;
// Source: https://github.com/tatuylonen/wiktextract#word-senses
type Sense = {
glosses?: string[];
raw_glosses?: string[];
tags?: string[]; // Such as ["archaic", "colloquial", "present"]; new words may appear
categories?: object[];
topics?: object[]; // To check
alt_of?: { word: string; extra?: string }[];
form_of?: { word: string; extra?: string }[];
translations?: Translation[]; // See note
synonyms?: Linkage[];
antonyms?: Linkage[];
hypernyms?: Linkage[];
holonyms?: Linkage[]; // "Not systematically encoded"
meronyms?: Linkage[]; // "Fairly rare"
coordinate_terms?: Linkage[];
derived?: Linkage[];
related?: Linkage[];
senseid?: string[];
wikidata?: string[]; // list of QIDs (e.g., Q123) for the sense
wikipedia?: string[]; // list of Wikipedia page titles
examples?: Example[];
english?: string; // Qualifiers that could not be parsed
};
// Source: https://github.com/tatuylonen/wiktextract#word-senses (see "examples")
type Example = {
text: string; // Entire example text
ref?: string; // Source reference
type?: "example" | "quotation";
roman?: string;
note?: string; // Rare
};
// Source: https://github.com/tatuylonen/wiktextract#translations
// Important note: "Translations are stored under the translations key in the word's
// data (if not sense-disambiguated) or in the word sense (if sense-disambiguated)"
type Translation = {
alt?: string; // To check
code?: string;
english?: string;
lang?: Lang;
note?: string;
roman?: string; // Romanization
sense?: string; // "May not match gloss exactly"
tags?: string[];
taxonomic?: string;
word?: string; // "May be missing when notes is present"
};
// Source: https://github.com/tatuylonen/wiktextract#linkages-to-other-words
// Applies to: synonyms, antonyms, hypernyms, derived words, holonyms, meronyms, derived, related, coordinate_terms
type Linkage = {
alt?: string;
english?: string;
roman?: string;
sense?: string;
tags?: string[]; // To check
taxonomic?: string;
topics?: string[];
word?: string;
};
// Source: https://github.com/tatuylonen/wiktextract#pronunciation
type Sound = {
ipa?: string; // IPA string
enpr?: string;
audio?: string;
ogg_url?: string;
mp3_url?: string;
"audio-ipa"?: string; // IPA string associated with the audio file
homophones?: string[]; // To check
hyphenation?: string[]; // To check
tags?: string[];
text?: string;
};
type Template = {
name?: string;
args?: object[];
expansion?: string;
};
// Source: https://github.com/tatuylonen/wiktextract#descendants
type Descendant = {
depth: number; // "The level of indentation of the current line. This can be used to track the hierarchical structure of the list."
templates: Template[];
text: string;
}[]; // To check
type Form = {
form: string;
tags: string;
ipa?: string;
roman?: string;
source?: string;
}[];
// Source: https://github.com/tatuylonen/wiktextract/blob/master/wiktextract/parts_of_speech.py
// Extracted by adding this to file: print(" | ".join([f'"{i}"' for i in PARTS_OF_SPEECH]))
type Pos =
| "adj_verb"
| "contraction"
| "adj"
| "name"
| "infix"
| "intj"
| "affix"
| "pron"
| "abbrev"
| "circumpos"
| "interfix"
| "prep"
| "proverb"
| "converb"
| "syllable"
| "num"
| "preverb"
| "conj"
| "phrase"
| "classifier"
| "romanization"
| "symbol"
| "suffix"
| "particle"
| "counter"
| "clause"
| "postp"
| "prep_phrase"
| "ambiposition"
| "adv_phrase"
| "det"
| "root"
| "noun"
| "circumfix"
| "adv"
| "combining_form"
| "character"
| "verb"
| "adj_noun"
| "article"
| "prefix"
| "punct";