15
15
require_once (DOKU_PLUGIN .'syntax.php ' );
16
16
17
17
class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
18
+ protected $ stopwords = null ;
19
+
20
+ /**
21
+ * Constructor. Loads stopwords.
22
+ */
23
+ public function __construct () {
24
+ $ this ->stopwords = $ this ->_getStopwords ();
25
+ }
18
26
19
27
function getType () { return 'substition ' ; }
20
28
function getPType () { return 'block ' ; }
@@ -131,6 +139,32 @@ function render($mode, Doku_Renderer $renderer, $data) {
131
139
return false ;
132
140
}
133
141
142
+ /**
143
+ * Helper function for loading and returning the array with stopwords.
144
+ *
145
+ * Stopwords files are loaded from two locations:
146
+ * - inc/lang/"actual language"/stopwords.txt
147
+ * - conf/stopwords.txt
148
+ *
149
+ * If both files exists, then both files are used - the content is merged.
150
+ */
151
+ protected function _getStopwords () {
152
+ // load stopwords
153
+ $ swfile = DOKU_INC .'inc/lang/ ' .$ conf ['lang ' ].'/stopwords.txt ' ;
154
+ if (@file_exists ($ swfile )) $ stopwords = file ($ swfile , FILE_IGNORE_NEW_LINES );
155
+ else $ stopwords = array ();
156
+
157
+ // load extra local stopwords
158
+ $ swfile = DOKU_CONF .'stopwords.txt ' ;
159
+ if (@file_exists ($ swfile )) $ stopwords = array_merge ($ stopwords , file ($ swfile , FILE_IGNORE_NEW_LINES ));
160
+
161
+ if (count ($ stopwords ) == 0 ) {
162
+ return null ;
163
+ }
164
+
165
+ return $ stopwords ;
166
+ }
167
+
134
168
/**
135
169
* Applies filters on the cloud:
136
170
* - removes all short words, see config option 'minimum_word_length'
@@ -144,6 +178,12 @@ function _filterCloud(&$cloud, $balcklistName) {
144
178
unset($ cloud [$ key ]);
145
179
}
146
180
181
+ // Remove stopwords
182
+ foreach ($ this ->stopwords as $ word ) {
183
+ if (isset ($ cloud [$ word ]))
184
+ unset($ cloud [$ word ]);
185
+ }
186
+
147
187
// Remove word which are on the blacklist
148
188
$ blacklist = $ this ->getConf ($ balcklistName );
149
189
if (!empty ($ blacklist )) {
@@ -163,15 +203,6 @@ function _filterCloud(&$cloud, $balcklistName) {
163
203
function _getWordCloud ($ num , &$ min , &$ max ) {
164
204
global $ conf ;
165
205
166
- // load stopwords
167
- $ swfile = DOKU_INC .'inc/lang/ ' .$ conf ['lang ' ].'/stopwords.txt ' ;
168
- if (@file_exists ($ swfile )) $ stopwords = file ($ swfile , FILE_IGNORE_NEW_LINES );
169
- else $ stopwords = array ();
170
-
171
- // load extra local stopwords
172
- $ swfile = DOKU_CONF .'stopwords.txt ' ;
173
- if (@file_exists ($ swfile )) $ stopwords = array_merge ($ stopwords , file ($ swfile , FILE_IGNORE_NEW_LINES ));
174
-
175
206
$ cloud = array ();
176
207
177
208
if (@file_exists ($ conf ['indexdir ' ].'/page.idx ' )) { // new word-length based index
@@ -182,14 +213,14 @@ function _getWordCloud($num, &$min, &$max) {
182
213
$ idx = idx_getIndex ('i ' , $ len );
183
214
$ word_idx = idx_getIndex ('w ' , $ len );
184
215
185
- $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx, $ stopwords );
216
+ $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx );
186
217
}
187
218
188
219
} else { // old index
189
220
$ idx = file ($ conf ['cachedir ' ].'/index.idx ' );
190
221
$ word_idx = file ($ conf ['cachedir ' ].'/word.idx ' );
191
222
192
- $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx, $ stopwords );
223
+ $ this ->_addWordsToCloud ($ cloud , $ idx , $ word_idx );
193
224
}
194
225
195
226
$ this ->_filterCloud ($ cloud , 'word_blacklist ' );
@@ -200,17 +231,15 @@ function _getWordCloud($num, &$min, &$max) {
200
231
/**
201
232
* Adds all words in given index as $word => $freq to $cloud array
202
233
*/
203
- function _addWordsToCloud (&$ cloud , $ idx , $ word_idx, & $ stopwords ) {
234
+ function _addWordsToCloud (&$ cloud , $ idx , $ word_idx ) {
204
235
$ wcount = count ($ word_idx );
205
236
206
237
// collect the frequency of the words
207
238
for ($ i = 0 ; $ i < $ wcount ; $ i ++) {
208
239
$ key = trim ($ word_idx [$ i ]);
209
- if (!is_int (array_search ($ key , $ stopwords ))) {
210
- $ value = explode (': ' , $ idx [$ i ]);
211
- if (!trim ($ value [0 ])) continue ;
212
- $ cloud [$ key ] = count ($ value );
213
- }
240
+ $ value = explode (': ' , $ idx [$ i ]);
241
+ if (!trim ($ value [0 ])) continue ;
242
+ $ cloud [$ key ] = count ($ value );
214
243
}
215
244
}
216
245
0 commit comments