From adfa99b36988c1c4028080def60ce38a6ac12312 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Mon, 22 Apr 2024 08:48:14 +0200 Subject: [PATCH] Issue #3441311 by antonio_zoocha, mkalkbrenner: Adding missing Search API solr text field type for Welsh language --- ...api_solr.solr_field_type.text_cy_6_0_0.yml | 461 +++++++++++++++++ ...api_solr.solr_field_type.text_cy_7_0_0.yml | 465 ++++++++++++++++++ tests/src/Kernel/SearchApiSolrTest.php | 2 + 3 files changed, 928 insertions(+) create mode 100644 config/optional/search_api_solr.solr_field_type.text_cy_6_0_0.yml create mode 100644 config/optional/search_api_solr.solr_field_type.text_cy_7_0_0.yml diff --git a/config/optional/search_api_solr.solr_field_type.text_cy_6_0_0.yml b/config/optional/search_api_solr.solr_field_type.text_cy_6_0_0.yml new file mode 100644 index 00000000..c5f4b2bf --- /dev/null +++ b/config/optional/search_api_solr.solr_field_type.text_cy_6_0_0.yml @@ -0,0 +1,461 @@ +langcode: en +status: true +dependencies: + module: + - search_api_solr + - language + config: + - language.entity.cy +id: text_cy_6_0_0 +label: 'Welsh Text Field' +minimum_solr_version: 6.0.0 +custom_code: '' +field_type_language_code: cy +domains: {} +field_type: + name: text_cy + class: solr.TextField + positionIncrementGap: 100 + storeOffsetsWithPositions: true + analyzers: + - + type: index + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterFilterFactory + catenateNumbers: 1 + generateNumberParts: 1 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 1 + - + class: solr.LowerCaseFilterFactory + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.SnowballPorterFilterFactory + language: Spanish + protected: protwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory + - + type: query + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterFilterFactory + catenateNumbers: 0 + generateNumberParts: 0 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 0 + - + class: solr.LowerCaseFilterFactory + - + class: solr.SynonymFilterFactory + synonyms: synonyms_cy.txt + expand: true + ignoreCase: true + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.SnowballPorterFilterFactory + language: Spanish + protected: protwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory +unstemmed_field_type: + name: text_unstemmed_cy + class: solr.TextField + positionIncrementGap: 100 + storeOffsetsWithPositions: true + analyzers: + - + type: index + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterFilterFactory + catenateNumbers: 1 + generateNumberParts: 1 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 1 + - + class: solr.LowerCaseFilterFactory + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory + - + type: query + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterFilterFactory + catenateNumbers: 0 + generateNumberParts: 0 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 0 + - + class: solr.LowerCaseFilterFactory + - + class: solr.SynonymFilterFactory + synonyms: synonyms_cy.txt + expand: true + ignoreCase: true + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory +collated_field_type: + name: collated_cy + class: solr.ICUCollationField + locale: cy + strength: primary + caseLevel: false +solr_configs: + searchComponents: + - + name: spellcheck + class: solr.SpellCheckComponent + lst: + - + name: spellchecker + str: + - + name: name + VALUE: cy + - + name: field + VALUE: spellcheck_cy + - + name: classname + VALUE: solr.DirectSolrSpellChecker + - + name: distanceMeasure + VALUE: internal + - + name: accuracy + VALUE: '0.5' + - + name: maxEdits + VALUE: '2' + - + name: minPrefix + VALUE: '1' + - + name: maxInspections + VALUE: '5' + - + name: minQueryLength + VALUE: '4' + - + name: maxQueryFrequency + VALUE: '0.01' + - + name: thresholdTokenFrequency + VALUE: '.01' + - + name: onlyMorePopular + VALUE: 'true' + - + name: suggest + class: solr.SuggestComponent + lst: + - + name: suggester + str: + - + name: name + VALUE: cy + - + name: lookupImpl + VALUE: AnalyzingInfixLookupFactory + - + name: dictionaryImpl + VALUE: DocumentDictionaryFactory + - + name: field + VALUE: twm_suggest + - + name: suggestAnalyzerFieldType + VALUE: text_cy + - + name: contextField + VALUE: sm_context_tags + - + name: buildOnCommit + VALUE: 'false' + - + name: buildOnStartup + VALUE: 'false' +text_files: + stopwords: | + a + ac + ag + am + an + ar + at + canys + cyfryw + ddim + eu + fel + fod + gan + hyn + hynny + hwn + i + mae + mewn + na + nac + neu + nhw + o + oedd + ond + os + ym + ymlaen + yn + yna + yno + yng + yr + synonyms: | + drupal, durpal + nouns: | + protwords: | + accents: | + # À => A + "\u00C0" => "A" + # Á => A + "\u00C1" => "A" + #  => A + "\u00C2" => "A" + # à => A + "\u00C3" => "A" + # Ä => A + "\u00C4" => "A" + # Å => A + "\u00C5" => "A" + # Ą => A + "\u0104" => "A" + # Æ => AE + "\u00C6" => "AE" + # Ç => C + "\u00C7" => "C" + # Ć => C + "\U0106" => "C" + # È => E + "\u00C8" => "E" + # É => E + "\u00C9" => "E" + # Ê => E + "\u00CA" => "E" + # Ë => E + "\u00CB" => "E" + # Ę => E + "\u0118" => "E" + # Ì => I + "\u00CC" => "I" + # Í => I + "\u00CD" => "I" + # Î => I + "\u00CE" => "I" + # Ï => I + "\u00CF" => "I" + # IJ => IJ + "\u0132" => "IJ" + # Ð => D + "\u00D0" => "D" + # Ł => L + "\u0141" => "L" + # Ñ => N + "\u00D1" => "N" + # Ń => N + "\u0143" => "N" + # Ò => O + "\u00D2" => "O" + # Ó => O + "\u00D3" => "O" + # Ô => O + "\u00D4" => "O" + # Õ => O + "\u00D5" => "O" + # Ö => O + "\u00D6" => "O" + # Ø => O + "\u00D8" => "O" + # Œ => OE + "\u0152" => "OE" + # Þ + "\u00DE" => "TH" + # Ù => U + "\u00D9" => "U" + # Ú => U + "\u00DA" => "U" + # Û => U + "\u00DB" => "U" + # Ü => U + "\u00DC" => "U" + # Ý => Y + "\u00DD" => "Y" + # Ÿ => Y + "\u0178" => "Y" + # à => a + "\u00E0" => "a" + # á => a + "\u00E1" => "a" + # â => a + "\u00E2" => "a" + # ã => a + "\u00E3" => "a" + # ä => a + "\u00E4" => "a" + # å => a + "\u00E5" => "a" + # æ => ae + "\u00E6" => "ae" + # ç => c + "\u00E7" => "c" + # è => e + "\u00E8" => "e" + # é => e + "\u00E9" => "e" + # ê => e + "\u00EA" => "e" + # ë => e + "\u00EB" => "e" + # ì => i + "\u00EC" => "i" + # í => i + "\u00ED" => "i" + # î => i + "\u00EE" => "i" + # ï => i + "\u00EF" => "i" + # ij => ij + "\u0133" => "ij" + # ð => d + "\u00F0" => "d" + # ñ => n + "\u00F1" => "n" + # ò => o + "\u00F2" => "o" + # ó => o + "\u00F3" => "o" + # ô => o + "\u00F4" => "o" + # õ => o + "\u00F5" => "o" + # ö => o + "\u00F6" => "o" + # ø => o + "\u00F8" => "o" + # œ => oe + "\u0153" => "oe" + # ß => ss + "\u00DF" => "ss" + # Ś => S + "\u015a" => "S" + # þ => th + "\u00FE" => "th" + # ù => u + "\u00F9" => "u" + # ú => u + "\u00FA" => "u" + # û => u + "\u00FB" => "u" + # ü => u + "\u00FC" => "u" + # ý => y + "\u00FD" => "y" + # ÿ => y + "\u00FF" => "y" + # Ź => Z + "\u0179" => "Z" + # Ż => Z + "\u017b" => "Z" + # ff => ff + "\uFB00" => "ff" + # fi => fi + "\uFB01" => "fi" + # fl => fl + "\uFB02" => "fl" + # ffi => ffi + "\uFB03" => "ffi" + # ffl => ffl + "\uFB04" => "ffl" + # ſt => st + "\uFB05" => "st" + # st => st + "\uFB06" => "st" + # Māori macrons. + # Ā => A + "\u0100" => "A" + # Ē => E + "\u0112" => "E" + # Ī => I + "\u012A" => "I" + # Ō => O + "\u014C" => "O" + # Ū => U + "\u016A" => "U" + # ā => a + "\u0101" => "a" + # ē => e + "\u0113" => "e" + # ī => i + "\u012B" => "i" + # ō => o + "\u014D" => "o" + # ū => u + "\u016B" => "u" diff --git a/config/optional/search_api_solr.solr_field_type.text_cy_7_0_0.yml b/config/optional/search_api_solr.solr_field_type.text_cy_7_0_0.yml new file mode 100644 index 00000000..78772712 --- /dev/null +++ b/config/optional/search_api_solr.solr_field_type.text_cy_7_0_0.yml @@ -0,0 +1,465 @@ +langcode: en +status: true +dependencies: + module: + - search_api_solr + - language + config: + - language.entity.cy +id: text_cy_7_0_0 +label: 'Welsh Text Field' +minimum_solr_version: 7.0.0 +custom_code: '' +field_type_language_code: cy +domains: {} +field_type: + name: text_cy + class: solr.TextField + positionIncrementGap: 100 + storeOffsetsWithPositions: true + analyzers: + - + type: index + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterGraphFilterFactory + catenateNumbers: 1 + generateNumberParts: 1 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 1 + - + class: solr.FlattenGraphFilterFactory + - + class: solr.LowerCaseFilterFactory + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.SnowballPorterFilterFactory + language: Spanish + protected: protwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory + - + type: query + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterGraphFilterFactory + catenateNumbers: 0 + generateNumberParts: 0 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 0 + - + class: solr.LowerCaseFilterFactory + - + class: solr.SynonymGraphFilterFactory + synonyms: synonyms_cy.txt + expand: true + ignoreCase: true + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.SnowballPorterFilterFactory + language: Spanish + protected: protwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory +unstemmed_field_type: + name: text_unstemmed_cy + class: solr.TextField + positionIncrementGap: 100 + storeOffsetsWithPositions: true + analyzers: + - + type: index + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterGraphFilterFactory + catenateNumbers: 1 + generateNumberParts: 1 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 1 + - + class: solr.FlattenGraphFilterFactory + - + class: solr.LowerCaseFilterFactory + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory + - + type: query + charFilters: + - + class: solr.MappingCharFilterFactory + mapping: accents_cy.txt + tokenizer: + class: solr.StandardTokenizerFactory + filters: + - + class: solr.WordDelimiterGraphFilterFactory + catenateNumbers: 0 + generateNumberParts: 0 + protected: protwords_cy.txt + splitOnCaseChange: 1 + generateWordParts: 1 + preserveOriginal: 1 + catenateAll: 0 + catenateWords: 0 + - + class: solr.LowerCaseFilterFactory + - + class: solr.SynonymGraphFilterFactory + synonyms: synonyms_cy.txt + expand: true + ignoreCase: true + - + class: solr.StopFilterFactory + ignoreCase: true + words: stopwords_cy.txt + - + class: solr.RemoveDuplicatesTokenFilterFactory +collated_field_type: + name: collated_cy + class: solr.ICUCollationField + locale: cy + strength: primary + caseLevel: false +solr_configs: + searchComponents: + - + name: spellcheck + class: solr.SpellCheckComponent + lst: + - + name: spellchecker + str: + - + name: name + VALUE: cy + - + name: field + VALUE: spellcheck_cy + - + name: classname + VALUE: solr.DirectSolrSpellChecker + - + name: distanceMeasure + VALUE: internal + - + name: accuracy + VALUE: '0.5' + - + name: maxEdits + VALUE: '2' + - + name: minPrefix + VALUE: '1' + - + name: maxInspections + VALUE: '5' + - + name: minQueryLength + VALUE: '4' + - + name: maxQueryFrequency + VALUE: '0.01' + - + name: thresholdTokenFrequency + VALUE: '.01' + - + name: onlyMorePopular + VALUE: 'true' + - + name: suggest + class: solr.SuggestComponent + lst: + - + name: suggester + str: + - + name: name + VALUE: cy + - + name: lookupImpl + VALUE: AnalyzingInfixLookupFactory + - + name: dictionaryImpl + VALUE: DocumentDictionaryFactory + - + name: field + VALUE: twm_suggest + - + name: suggestAnalyzerFieldType + VALUE: text_cy + - + name: contextField + VALUE: sm_context_tags + - + name: buildOnCommit + VALUE: 'false' + - + name: buildOnStartup + VALUE: 'false' +text_files: + stopwords: | + a + ac + ag + am + an + ar + at + canys + cyfryw + ddim + eu + fel + fod + gan + hyn + hynny + hwn + i + mae + mewn + na + nac + neu + nhw + o + oedd + ond + os + ym + ymlaen + yn + yna + yno + yng + yr + synonyms: | + drupal, durpal + nouns: | + protwords: | + accents: | + # À => A + "\u00C0" => "A" + # Á => A + "\u00C1" => "A" + #  => A + "\u00C2" => "A" + # à => A + "\u00C3" => "A" + # Ä => A + "\u00C4" => "A" + # Å => A + "\u00C5" => "A" + # Ą => A + "\u0104" => "A" + # Æ => AE + "\u00C6" => "AE" + # Ç => C + "\u00C7" => "C" + # Ć => C + "\U0106" => "C" + # È => E + "\u00C8" => "E" + # É => E + "\u00C9" => "E" + # Ê => E + "\u00CA" => "E" + # Ë => E + "\u00CB" => "E" + # Ę => E + "\u0118" => "E" + # Ì => I + "\u00CC" => "I" + # Í => I + "\u00CD" => "I" + # Î => I + "\u00CE" => "I" + # Ï => I + "\u00CF" => "I" + # IJ => IJ + "\u0132" => "IJ" + # Ð => D + "\u00D0" => "D" + # Ł => L + "\u0141" => "L" + # Ñ => N + "\u00D1" => "N" + # Ń => N + "\u0143" => "N" + # Ò => O + "\u00D2" => "O" + # Ó => O + "\u00D3" => "O" + # Ô => O + "\u00D4" => "O" + # Õ => O + "\u00D5" => "O" + # Ö => O + "\u00D6" => "O" + # Ø => O + "\u00D8" => "O" + # Œ => OE + "\u0152" => "OE" + # Þ + "\u00DE" => "TH" + # Ù => U + "\u00D9" => "U" + # Ú => U + "\u00DA" => "U" + # Û => U + "\u00DB" => "U" + # Ü => U + "\u00DC" => "U" + # Ý => Y + "\u00DD" => "Y" + # Ÿ => Y + "\u0178" => "Y" + # à => a + "\u00E0" => "a" + # á => a + "\u00E1" => "a" + # â => a + "\u00E2" => "a" + # ã => a + "\u00E3" => "a" + # ä => a + "\u00E4" => "a" + # å => a + "\u00E5" => "a" + # æ => ae + "\u00E6" => "ae" + # ç => c + "\u00E7" => "c" + # è => e + "\u00E8" => "e" + # é => e + "\u00E9" => "e" + # ê => e + "\u00EA" => "e" + # ë => e + "\u00EB" => "e" + # ì => i + "\u00EC" => "i" + # í => i + "\u00ED" => "i" + # î => i + "\u00EE" => "i" + # ï => i + "\u00EF" => "i" + # ij => ij + "\u0133" => "ij" + # ð => d + "\u00F0" => "d" + # ñ => n + "\u00F1" => "n" + # ò => o + "\u00F2" => "o" + # ó => o + "\u00F3" => "o" + # ô => o + "\u00F4" => "o" + # õ => o + "\u00F5" => "o" + # ö => o + "\u00F6" => "o" + # ø => o + "\u00F8" => "o" + # œ => oe + "\u0153" => "oe" + # ß => ss + "\u00DF" => "ss" + # Ś => S + "\u015a" => "S" + # þ => th + "\u00FE" => "th" + # ù => u + "\u00F9" => "u" + # ú => u + "\u00FA" => "u" + # û => u + "\u00FB" => "u" + # ü => u + "\u00FC" => "u" + # ý => y + "\u00FD" => "y" + # ÿ => y + "\u00FF" => "y" + # Ź => Z + "\u0179" => "Z" + # Ż => Z + "\u017b" => "Z" + # ff => ff + "\uFB00" => "ff" + # fi => fi + "\uFB01" => "fi" + # fl => fl + "\uFB02" => "fl" + # ffi => ffi + "\uFB03" => "ffi" + # ffl => ffl + "\uFB04" => "ffl" + # ſt => st + "\uFB05" => "st" + # st => st + "\uFB06" => "st" + # Māori macrons. + # Ā => A + "\u0100" => "A" + # Ē => E + "\u0112" => "E" + # Ī => I + "\u012A" => "I" + # Ō => O + "\u014C" => "O" + # Ū => U + "\u016A" => "U" + # ā => a + "\u0101" => "a" + # ē => e + "\u0113" => "e" + # ī => i + "\u012B" => "i" + # ō => o + "\u014D" => "o" + # ū => u + "\u016B" => "u" diff --git a/tests/src/Kernel/SearchApiSolrTest.php b/tests/src/Kernel/SearchApiSolrTest.php index fd2e29db..800d731e 100644 --- a/tests/src/Kernel/SearchApiSolrTest.php +++ b/tests/src/Kernel/SearchApiSolrTest.php @@ -55,6 +55,7 @@ class SearchApiSolrTest extends SolrBackendTestBase { 'bg' => 'bg', 'ca' => 'ca', 'cs' => 'cs', + 'cy' => 'cy', 'da' => 'da', 'el' => 'el', 'es' => 'es', @@ -336,6 +337,7 @@ protected function checkSchemaLanguages() { $language_ids['zh-hant'] = FALSE; if (version_compare($targeted_solr_major_version, '6', '<')) { $language_ids['ar'] = FALSE; + $language_ids['cy'] = FALSE; $language_ids['ja'] = FALSE; $language_ids['hu'] = FALSE; $language_ids['sk'] = FALSE;