16
16
17
17
class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
18
18
protected $ knownFlags = array ('showCount ' );
19
+ protected $ stopwords = null ;
20
+
21
+ /**
22
+ * Constructor. Loads stopwords.
23
+ */
24
+ public function __construct () {
25
+ $ this ->stopwords = $ this ->_getStopwords ();
26
+ }
19
27
20
28
function getType () { return 'substition ' ; }
21
29
function getPType () { return 'block ' ; }
@@ -147,6 +155,32 @@ function render($mode, Doku_Renderer $renderer, $data) {
147
155
return false ;
148
156
}
149
157
158
+ /**
159
+ * Helper function for loading and returning the array with stopwords.
160
+ *
161
+ * Stopwords files are loaded from two locations:
162
+ * - inc/lang/"actual language"/stopwords.txt
163
+ * - conf/stopwords.txt
164
+ *
165
+ * If both files exists, then both files are used - the content is merged.
166
+ */
167
+ protected function _getStopwords () {
168
+ // load stopwords
169
+ $ swfile = DOKU_INC .'inc/lang/ ' .$ conf ['lang ' ].'/stopwords.txt ' ;
170
+ if (@file_exists ($ swfile )) $ stopwords = file ($ swfile , FILE_IGNORE_NEW_LINES );
171
+ else $ stopwords = array ();
172
+
173
+ // load extra local stopwords
174
+ $ swfile = DOKU_CONF .'stopwords.txt ' ;
175
+ if (@file_exists ($ swfile )) $ stopwords = array_merge ($ stopwords , file ($ swfile , FILE_IGNORE_NEW_LINES ));
176
+
177
+ if (count ($ stopwords ) == 0 ) {
178
+ return null ;
179
+ }
180
+
181
+ return $ stopwords ;
182
+ }
183
+
150
184
/**
151
185
* Applies filters on the cloud:
152
186
* - removes all short words, see config option 'minimum_word_length'
@@ -160,6 +194,12 @@ function _filterCloud(&$cloud, $balcklistName) {
160
194
unset($ cloud [$ key ]);
161
195
}
162
196
197
+ // Remove stopwords
198
+ foreach ($ this ->stopwords as $ word ) {
199
+ if (isset ($ cloud [$ word ]))
200
+ unset($ cloud [$ word ]);
201
+ }
202
+
163
203
// Remove word which are on the blacklist
164
204
$ blacklist = $ this ->getConf ($ balcklistName );
165
205
if (!empty ($ blacklist )) {
@@ -179,15 +219,6 @@ function _filterCloud(&$cloud, $balcklistName) {
179
219
function _getWordCloud ($ num , &$ min , &$ max ) {
180
220
global $ conf ;
181
221
182
- // load stopwords
183
- $ swfile = DOKU_INC .'inc/lang/ ' .$ conf ['lang ' ].'/stopwords.txt ' ;
184
- if (@file_exists ($ swfile )) $ stopwords = file ($ swfile , FILE_IGNORE_NEW_LINES );
185
- else $ stopwords = array ();
186
-
187
- // load extra local stopwords
188
- $ swfile = DOKU_CONF .'stopwords.txt ' ;
189
- if (@file_exists ($ swfile )) $ stopwords = array_merge ($ stopwords , file ($ swfile , FILE_IGNORE_NEW_LINES ));
190
-
191
222
$ cloud = array ();
192
223
193
224
if (@file_exists ($ conf ['indexdir ' ].'/page.idx ' )) { // new word-length based index
@@ -198,14 +229,14 @@ function _getWordCloud($num, &$min, &$max) {
198
229
$ idx = idx_getIndex ('i ' , $ len );
199
230
$ word_idx = idx_getIndex ('w ' , $ len );
200
231
201
- $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx, $ stopwords );
232
+ $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx );
202
233
}
203
234
204
235
} else { // old index
205
236
$ idx = file ($ conf ['cachedir ' ].'/index.idx ' );
206
237
$ word_idx = file ($ conf ['cachedir ' ].'/word.idx ' );
207
238
208
- $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx, $ stopwords );
239
+ $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx );
209
240
}
210
241
211
242
$ this ->_filterCloud ($ cloud , 'word_blacklist ' );
@@ -216,17 +247,15 @@ function _getWordCloud($num, &$min, &$max) {
216
247
/**
217
248
* Adds all words in given index as $word => $freq to $cloud array
218
249
*/
219
- function _addWordsToCloud (&$ cloud , $ idx , $ word_idx, & $ stopwords ) {
250
+ function _addWordsToCloud (&$ cloud , $ idx , $ word_idx ) {
220
251
$ wcount = count ($ word_idx );
221
252
222
253
// collect the frequency of the words
223
254
for ($ i = 0 ; $ i < $ wcount ; $ i ++) {
224
255
$ key = trim ($ word_idx [$ i ]);
225
- if (!is_int (array_search ($ key , $ stopwords ))) {
226
- $ value = explode (': ' , $ idx [$ i ]);
227
- if (!trim ($ value [0 ])) continue ;
228
- $ cloud [$ key ] = count ($ value );
229
- }
256
+ $ value = explode (': ' , $ idx [$ i ]);
257
+ if (!trim ($ value [0 ])) continue ;
258
+ $ cloud [$ key ] = count ($ value );
230
259
}
231
260
}
232
261
0 commit comments