Skip to content

Commit 11efc76

Browse files
committed
Removing dictionary file support (in support of #278)
Note that I'm smuggling this in with the config cin support changes, since this requires changes to the config. However, ODD and documentation changes still need to be done, so can't be resolved without further work.
1 parent 4abbab7 commit 11efc76

File tree

2 files changed

+39
-26
lines changed

2 files changed

+39
-26
lines changed

xsl/create_config_xsl.xsl

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,8 @@
348348

349349
<!--First, create the global variables and parameters-->
350350
<xsl:call-template name="createGlobals" exclude-result-prefixes="#all"/>
351-
352-
<!--Now create the dictionary XML files-->
353-
<xsl:call-template name="createDictionaryXML" exclude-result-prefixes="#all"/>
354351

352+
<xsl:call-template name="createStopwordsXML" exclude-result-prefixes="#all"/>
355353

356354
<xsl:for-each select="$rules">
357355
<xso:template match="{@match}" priority="{$PRIORITY_THIRD}" mode="decorate">
@@ -583,9 +581,6 @@
583581
</xso:param>
584582
</xsl:for-each>
585583

586-
587-
588-
589584
<!-- We record the current default stemmer folder. -->
590585
<xso:param name="defaultStemmerFolder"><xsl:value-of select="$ssDefaultStemmerFolder"/></xso:param>
591586

@@ -686,9 +681,27 @@
686681
</xso:template>
687682
</xsl:template>
688683

689-
690-
691-
684+
<xd:doc>
685+
<xd:desc>Template to create an XML representation of the stopwords file
686+
and an associated key</xd:desc>
687+
</xd:doc>
688+
<xsl:template name="createStopwordsXML">
689+
<xsl:for-each select="($configDoc//stopwordsFile)">
690+
<xsl:variable name="path" select="resolve-uri(text(),$configUri)"/>
691+
<xsl:variable name="uri" select="concat($outDir,'/dicts/',substring-before(tokenize($path,'/')[last()],'.txt'),'.xml')"/>
692+
<xsl:result-document href="{$uri}" method="xml">
693+
<hcmc:words>
694+
<xsl:for-each select="tokenize(unparsed-text($path),'\s+')">
695+
<hcmc:word><xsl:value-of select="lower-case(normalize-space(.))"/></hcmc:word>
696+
</xsl:for-each>
697+
</hcmc:words>
698+
</xsl:result-document>
699+
<xsl:variable name="docFn">doc('<xsl:value-of select="$uri"/>')</xsl:variable>
700+
<xso:variable name="{concat(local-name(),'Xml')}" select="{$docFn}"/>
701+
</xsl:for-each>
702+
<xso:key name="w" match="hcmc:word" use="."/>
703+
</xsl:template>
704+
<!--
692705
<xd:doc>
693706
<xd:desc>Template to create an XML representation of the dictionary file
694707
and an associated key.</xd:desc>
@@ -709,7 +722,7 @@
709722
</xsl:for-each>
710723
711724
<xso:key name="w" match="hcmc:word" use="."/>
712-
</xsl:template>
725+
</xsl:template>-->
713726

714727

715728

xsl/create_reports.xsl

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@
127127
<xsl:call-template name="createFilters"/>
128128
<xsl:call-template name="createExcludes"/>
129129
<xsl:if test="$verboseReport = 'true'">
130-
<xsl:call-template name="createNonDictionaryList"/>
130+
<!-- <xsl:call-template name="createNonDictionaryList"/>-->
131131
<xsl:call-template name="createForeignWordList"/>
132132
</xsl:if>
133133
</div>
@@ -378,7 +378,7 @@
378378
</xsl:if>
379379
</xsl:template>
380380

381-
<xd:doc>
381+
<!-- <xd:doc>
382382
<xd:desc>Template for creating the "Not in Dictionary" list. While a term's exclusion
383383
from the dictionary doesn't change the search results, this report is helpful for catching
384384
typos in your document collection. </xd:desc>
@@ -388,29 +388,29 @@
388388
<section>
389389
<h2>Words Not In Dictionary</h2>
390390
391-
<!--Only check stems that are words-->
391+
<!-\-Only check stems that are words-\->
392392
<xsl:variable name="stemsToCheck" select="$spans[not(matches(@ss-stem,'\d'))][not(hcmc:isForeign(.))]" as="element(span)*"/>
393393
394-
<!--Retrieve the outermost spans so we don't include the nested spans from hyphenated terms
395-
(we process those a bit differently) -->
394+
<!-\-Retrieve the outermost spans so we don't include the nested spans from hyphenated terms
395+
(we process those a bit differently) -\->
396396
<xsl:variable name="outermostStems" select="outermost($stemsToCheck)" as="element(span)*"/>
397397
398398
<xsl:variable name="wordsNotInDictionaryMap" as="map(xs:string, element(span)*)">
399399
<xsl:map>
400-
<!--Group by whether or not it has descendant spans-->
400+
<!-\-Group by whether or not it has descendant spans-\->
401401
<xsl:for-each-group select="$outermostStems" group-by="exists(child::span[@ss-stem])">
402402
<xsl:choose>
403-
<!--If this thing has child stems, it's a hyphenated construct
404-
and so we check each child term individually-->
403+
<!-\-If this thing has child stems, it's a hyphenated construct
404+
and so we check each child term individually-\->
405405
<xsl:when test="current-grouping-key()">
406-
<!--Now iterate through all of the hyphenated spans-->
406+
<!-\-Now iterate through all of the hyphenated spans-\->
407407
<xsl:for-each-group select="current-group()" group-by="string(.)">
408-
<!--Stash the word-->
408+
<!-\-Stash the word-\->
409409
<xsl:variable name="term" select="current-grouping-key()"/>
410-
<!--Stash the current context-->
410+
<!-\-Stash the current context-\->
411411
<xsl:variable name="hyphenatedSpan" select="current-group()[1]" as="element(span)"/>
412412
413-
<!--Not in dictionary spans-->
413+
<!-\-Not in dictionary spans-\->
414414
<xsl:variable name="words"
415415
select="for $s in $hyphenatedSpan/span[@ss-stem] return lower-case(string($s))"
416416
as="xs:string*"/>
@@ -428,7 +428,7 @@
428428
</xsl:for-each-group>
429429
</xsl:when>
430430
<xsl:otherwise>
431-
<!--Group by string value (so basically just distinct values)-->
431+
<!-\-Group by string value (so basically just distinct values)-\->
432432
<xsl:for-each-group select="current-group()" group-by="hcmc:cleanWordForStemming(lower-case(string(.)))">
433433
<xsl:variable name="word" select="current-grouping-key()" as="xs:string"/>
434434
<xsl:if test="not(hcmc:isInDictionary($word))">
@@ -483,9 +483,9 @@
483483
</xsl:choose>
484484
</details>
485485
</section>
486-
</xsl:template>
486+
</xsl:template>-->
487487

488-
<xd:doc>
488+
<!-- <xd:doc>
489489
<xd:desc><xd:ref name="hcmc:isInDictionary">hcmc:isInDictionary</xd:ref> checks
490490
whether or not a word is in the provided dictionary. This is basically just a wrapper
491491
around the key() function, but we take advantage of Saxon 10HE's memo-function capabilities
@@ -495,7 +495,7 @@
495495
<xsl:function name="hcmc:isInDictionary" new-each-time="no" as="xs:boolean">
496496
<xsl:param name="word" as="xs:string"/>
497497
<xsl:sequence select="exists(key('w', $word, $dictionaryFileXml))"/>
498-
</xsl:function>
498+
</xsl:function>-->
499499

500500
<xd:doc>
501501
<xd:desc>Template to create a report of all "foreign" words in the collection:

0 commit comments

Comments
 (0)