Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
LarsGit223 authored Jun 4, 2017
2 parents 46c1465 + 2340acf commit af1dbc4
Showing 1 changed file with 46 additions and 17 deletions.
63 changes: 46 additions & 17 deletions syntax.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@

class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
protected $knownFlags = array('showCount');
protected $stopwords = null;

/**
* Constructor. Loads stopwords.
*/
public function __construct() {
$this->stopwords = $this->_getStopwords();
}

function getType() { return 'substition'; }
function getPType() { return 'block'; }
Expand Down Expand Up @@ -147,6 +155,32 @@ function render($mode, Doku_Renderer $renderer, $data) {
return false;
}

/**
* Helper function for loading and returning the array with stopwords.
*
* Stopwords files are loaded from two locations:
* - inc/lang/"actual language"/stopwords.txt
* - conf/stopwords.txt
*
* If both files exists, then both files are used - the content is merged.
*/
protected function _getStopwords() {
// load stopwords
$swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
else $stopwords = array();

// load extra local stopwords
$swfile = DOKU_CONF.'stopwords.txt';
if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES));

if (count($stopwords) == 0) {
return null;
}

return $stopwords;
}

/**
* Applies filters on the cloud:
* - removes all short words, see config option 'minimum_word_length'
Expand All @@ -160,6 +194,12 @@ function _filterCloud(&$cloud, $balcklistName) {
unset($cloud[$key]);
}

// Remove stopwords
foreach ($this->stopwords as $word) {
if (isset($cloud[$word]))
unset($cloud[$word]);
}

// Remove word which are on the blacklist
$blacklist = $this->getConf($balcklistName);
if(!empty($blacklist)) {
Expand All @@ -179,15 +219,6 @@ function _filterCloud(&$cloud, $balcklistName) {
function _getWordCloud($num, &$min, &$max) {
global $conf;

// load stopwords
$swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
else $stopwords = array();

// load extra local stopwords
$swfile = DOKU_CONF.'stopwords.txt';
if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES));

$cloud = array();

if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index
Expand All @@ -198,14 +229,14 @@ function _getWordCloud($num, &$min, &$max) {
$idx = idx_getIndex('i', $len);
$word_idx = idx_getIndex('w', $len);

$this->_addWordsToCloud($cloud, $idx, $word_idx, $stopwords);
$this->_addWordsToCloud($cloud, $idx, $word_idx);
}

} else { // old index
$idx = file($conf['cachedir'].'/index.idx');
$word_idx = file($conf['cachedir'].'/word.idx');

$this->_addWordsToCloud($cloud, $idx, $word_idx, $stopwords);
$this->_addWordsToCloud($cloud, $idx, $word_idx);
}

$this->_filterCloud($cloud, 'word_blacklist');
Expand All @@ -216,17 +247,15 @@ function _getWordCloud($num, &$min, &$max) {
/**
* Adds all words in given index as $word => $freq to $cloud array
*/
function _addWordsToCloud(&$cloud, $idx, $word_idx, &$stopwords) {
function _addWordsToCloud(&$cloud, $idx, $word_idx) {
$wcount = count($word_idx);

// collect the frequency of the words
for ($i = 0; $i < $wcount; $i++) {
$key = trim($word_idx[$i]);
if (!is_int(array_search($key, $stopwords))) {
$value = explode(':', $idx[$i]);
if (!trim($value[0])) continue;
$cloud[$key] = count($value);
}
$value = explode(':', $idx[$i]);
if (!trim($value[0])) continue;
$cloud[$key] = count($value);
}
}

Expand Down

0 comments on commit af1dbc4

Please sign in to comment.