From c35e37a304d4cb83ac30d84633cff404b32961c8 Mon Sep 17 00:00:00 2001 From: sota-horiuchi Date: Tue, 26 Mar 2024 14:59:55 +0900 Subject: [PATCH 1/6] Add: ContinuousLanguageLocale module --- lib/ingreedy.rb | 1 + lib/ingreedy/amount_parser.rb | 12 ++++++- lib/ingreedy/continuous_language_locale.rb | 9 ++++++ lib/ingreedy/dictionary_collection.rb | 14 ++++++-- lib/ingreedy/root_parser.rb | 11 ++++++- spec/ingreedy_spec.rb | 37 ++++++++++++++++++++++ 6 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 lib/ingreedy/continuous_language_locale.rb diff --git a/lib/ingreedy.rb b/lib/ingreedy.rb index ef355ad..d33f6f2 100644 --- a/lib/ingreedy.rb +++ b/lib/ingreedy.rb @@ -1,6 +1,7 @@ path = File.expand_path(File.join(File.dirname(__FILE__), "ingreedy")) require File.join(path, "case_insensitive_parser") +require File.join(path, "continuous_language_locale") require File.join(path, "ingreedy_parser") require File.join(path, "dictionary_collection") diff --git a/lib/ingreedy/amount_parser.rb b/lib/ingreedy/amount_parser.rb index 0250533..c657db5 100644 --- a/lib/ingreedy/amount_parser.rb +++ b/lib/ingreedy/amount_parser.rb @@ -3,11 +3,17 @@ module Ingreedy class AmountParser < Parslet::Parser include CaseInsensitiveParser + include ContinuousLanguageLocale rule(:whitespace) do - match("\s") + if use_whitespace?(current_locale) + match("\s") + else + match("\s").maybe + end end + rule(:integer) do match("[0-9]").repeat(1) end @@ -58,6 +64,10 @@ class AmountParser < Parslet::Parser private + def current_locale + Ingreedy.dictionaries.current_locale + end + def word_digits Ingreedy.dictionaries.current.numbers.keys end diff --git a/lib/ingreedy/continuous_language_locale.rb b/lib/ingreedy/continuous_language_locale.rb new file mode 100644 index 0000000..ed0c811 --- /dev/null +++ b/lib/ingreedy/continuous_language_locale.rb @@ -0,0 +1,9 @@ +module Ingreedy + module ContinuousLanguageLocale + CONTINUOUS_LANGUAGES_LOCALES = [:ja].freeze + + def use_whitespace?(locale) + !CONTINUOUS_LANGUAGES_LOCALES.include?(locale) + end + end +end diff --git a/lib/ingreedy/dictionary_collection.rb b/lib/ingreedy/dictionary_collection.rb index 9af269a..167ca45 100644 --- a/lib/ingreedy/dictionary_collection.rb +++ b/lib/ingreedy/dictionary_collection.rb @@ -12,17 +12,25 @@ def []=(locale, attributes) end def current + fetch_dictionary(current_locale) + end + + def current_locale + find_locale + end + + private + + def find_locale candidate_locales.each do |locale| if dictionary = fetch_dictionary(locale) - return dictionary + return locale end end raise "No dictionary found for locales: #{candidate_locales}" end - private - def candidate_locales Array(Ingreedy.locale || i18n_gem_locales || :en) end diff --git a/lib/ingreedy/root_parser.rb b/lib/ingreedy/root_parser.rb index 4d1b0d9..82949a0 100644 --- a/lib/ingreedy/root_parser.rb +++ b/lib/ingreedy/root_parser.rb @@ -1,6 +1,7 @@ module Ingreedy class RootParser < Parslet::Parser include CaseInsensitiveParser + include ContinuousLanguageLocale rule(:range) do AmountParser.new.as(:amount) >> @@ -19,7 +20,11 @@ class RootParser < Parslet::Parser end rule(:whitespace) do - match("\s") + if use_whitespace?(current_locale) + match("\s") + else + match("\s").maybe + end end rule(:container_amount) do @@ -119,6 +124,10 @@ def parse private attr_reader :original_query + + def current_locale + Ingreedy.dictionaries.current_locale + end def imprecise_amounts Ingreedy.dictionaries.current.imprecise_amounts diff --git a/spec/ingreedy_spec.rb b/spec/ingreedy_spec.rb index 4821a6e..46b2285 100644 --- a/spec/ingreedy_spec.rb +++ b/spec/ingreedy_spec.rb @@ -447,6 +447,43 @@ end end +describe Ingreedy, "continuous language" do + before(:all) do + Ingreedy.dictionaries[:ja] = { + units: { gram: ["g"] }, + numbers: { "一" => 1 }, + } + Ingreedy.locale = :ja + end + + after(:all) do + Ingreedy.locale = nil + end + + it "parses correctly" do + result = Ingreedy.parse "200g砂糖" + + expect(result.amount).to eq(200) + expect(result.unit).to eq(:gram) + expect(result.ingredient).to eq("砂糖") + end + + it "parses correctly with reverse format" do + result = Ingreedy.parse "砂糖200g" + + expect(result.amount).to eq(200) + expect(result.unit).to eq(:gram) + expect(result.ingredient).to eq("砂糖") + end + + it "parses correctly with numbers" do + result = Ingreedy.parse "卵一g" + + expect(result.amount).to eq(1) + expect(result.ingredient).to eq("卵") + end +end + describe Ingreedy, "error handling" do it "wraps Parslet exceptions in a custom exception" do expect do From a9df09bdb82cc79942e8db0e6c138c81f5af5533 Mon Sep 17 00:00:00 2001 From: sota-horiuchi Date: Tue, 26 Mar 2024 15:01:21 +0900 Subject: [PATCH 2/6] Improve: reverse format - avoid parsing sugger 100g -> sugge, 100, g when continuous language --- lib/ingreedy/root_parser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ingreedy/root_parser.rb b/lib/ingreedy/root_parser.rb index 82949a0..1f30219 100644 --- a/lib/ingreedy/root_parser.rb +++ b/lib/ingreedy/root_parser.rb @@ -92,7 +92,7 @@ class RootParser < Parslet::Parser rule(:reverse_format) do # e.g. flour 200g - ((whitespace >> quantity).absent? >> any).repeat.as(:ingredient) >> + ((whitespace >> quantity >> any.absent?).absent? >> any).repeat.as(:ingredient) >> whitespace >> quantity end From 15164287985c914e28a0e27405d42d20944a5b63 Mon Sep 17 00:00:00 2001 From: sota-horiuchi Date: Tue, 26 Mar 2024 16:31:25 +0900 Subject: [PATCH 3/6] Remove: newline --- lib/ingreedy/amount_parser.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/ingreedy/amount_parser.rb b/lib/ingreedy/amount_parser.rb index c657db5..d6f291e 100644 --- a/lib/ingreedy/amount_parser.rb +++ b/lib/ingreedy/amount_parser.rb @@ -13,7 +13,6 @@ class AmountParser < Parslet::Parser end end - rule(:integer) do match("[0-9]").repeat(1) end From ce01093d9d020d0990773128fda98c443c518038 Mon Sep 17 00:00:00 2001 From: sota-horiuchi Date: Tue, 26 Mar 2024 17:40:36 +0900 Subject: [PATCH 4/6] Add: Taiwan and Thailand to CONTINUOUS_LANGUAGES_LOCALES --- lib/ingreedy/continuous_language_locale.rb | 2 +- .../continuouse_language_locale_spec.rb | 23 +++++ spec/ingreedy_spec.rb | 91 ++++++++++++++----- 3 files changed, 90 insertions(+), 26 deletions(-) create mode 100644 spec/ingreedy/continuouse_language_locale_spec.rb diff --git a/lib/ingreedy/continuous_language_locale.rb b/lib/ingreedy/continuous_language_locale.rb index ed0c811..1773f2b 100644 --- a/lib/ingreedy/continuous_language_locale.rb +++ b/lib/ingreedy/continuous_language_locale.rb @@ -1,6 +1,6 @@ module Ingreedy module ContinuousLanguageLocale - CONTINUOUS_LANGUAGES_LOCALES = [:ja].freeze + CONTINUOUS_LANGUAGES_LOCALES = %i(ja th zh-TW) def use_whitespace?(locale) !CONTINUOUS_LANGUAGES_LOCALES.include?(locale) diff --git a/spec/ingreedy/continuouse_language_locale_spec.rb b/spec/ingreedy/continuouse_language_locale_spec.rb new file mode 100644 index 0000000..3b62b92 --- /dev/null +++ b/spec/ingreedy/continuouse_language_locale_spec.rb @@ -0,0 +1,23 @@ +require 'ingreedy/continuous_language_locale' + +RSpec.describe Ingreedy::ContinuousLanguageLocale do + include Ingreedy::ContinuousLanguageLocale + + describe '#use_whitespace?' do + context 'when the locale is a continuous language' do + it 'returns false' do + expect(use_whitespace?(:ja)).to be_falsey + expect(use_whitespace?(:th)).to be_falsey + expect(use_whitespace?(:'zh-TW')).to be_falsey + end + end + + context 'when the locale is not a continuous language' do + it 'returns true' do + expect(use_whitespace?(:en)).to be_truthy + expect(use_whitespace?(:fr)).to be_truthy + expect(use_whitespace?(:'zh-CN')).to be_truthy + end + end + end +end diff --git a/spec/ingreedy_spec.rb b/spec/ingreedy_spec.rb index 46b2285..66a6488 100644 --- a/spec/ingreedy_spec.rb +++ b/spec/ingreedy_spec.rb @@ -448,39 +448,80 @@ end describe Ingreedy, "continuous language" do - before(:all) do - Ingreedy.dictionaries[:ja] = { - units: { gram: ["g"] }, - numbers: { "一" => 1 }, - } - Ingreedy.locale = :ja - end + context "Japanese" do + before do + Ingreedy.dictionaries[:ja] = { + units: { gram: ["g"], other: ["個"] }, + numbers: { "一" => 1 }, + } + Ingreedy.locale = :ja + end - after(:all) do - Ingreedy.locale = nil - end + after do + Ingreedy.locale = nil + end - it "parses correctly" do - result = Ingreedy.parse "200g砂糖" + it "parses correctly" do + result = Ingreedy.parse "200g砂糖" - expect(result.amount).to eq(200) - expect(result.unit).to eq(:gram) - expect(result.ingredient).to eq("砂糖") - end + expect(result.amount).to eq(200) + expect(result.unit).to eq(:gram) + expect(result.ingredient).to eq("砂糖") + end - it "parses correctly with reverse format" do - result = Ingreedy.parse "砂糖200g" + it "parses correctly with reverse format" do + result = Ingreedy.parse "砂糖200g" - expect(result.amount).to eq(200) - expect(result.unit).to eq(:gram) - expect(result.ingredient).to eq("砂糖") + expect(result.amount).to eq(200) + expect(result.unit).to eq(:gram) + expect(result.ingredient).to eq("砂糖") + end + + it "parses correctly with numbers" do + result = Ingreedy.parse "卵一個" + + expect(result.amount).to eq(1) + expect(result.unit).to eq(:other) + expect(result.ingredient).to eq("卵") + end end - it "parses correctly with numbers" do - result = Ingreedy.parse "卵一g" + context "Taiwanese" do + before do + Ingreedy.dictionaries[:"zh-TW"] = { + units: { gram: ["g"], other: ["个"] }, + numbers: { "一" => 1 }, + } + Ingreedy.locale = :"zh-TW" + end - expect(result.amount).to eq(1) - expect(result.ingredient).to eq("卵") + after do + Ingreedy.locale = nil + end + + it "parses correctly" do + result = Ingreedy.parse "200g砂糖" + + expect(result.amount).to eq(200) + expect(result.unit).to eq(:gram) + expect(result.ingredient).to eq("砂糖") + end + + it "parses correctly with reverse format" do + result = Ingreedy.parse "砂糖200g" + + expect(result.amount).to eq(200) + expect(result.unit).to eq(:gram) + expect(result.ingredient).to eq("砂糖") + end + + it "parses correctly with numbers" do + result = Ingreedy.parse "一个鸡蛋" + + expect(result.amount).to eq(1) + expect(result.unit).to eq(:other) + expect(result.ingredient).to eq("鸡蛋") + end end end From 72046b1e8d45e2ae32c8a36cb050f7cbf831cb02 Mon Sep 17 00:00:00 2001 From: sota-horiuchi Date: Tue, 26 Mar 2024 18:00:16 +0900 Subject: [PATCH 5/6] Refactore: method to get current locale --- lib/ingreedy/amount_parser.rb | 2 +- lib/ingreedy/dictionary.rb | 5 +++-- lib/ingreedy/dictionary_collection.rb | 18 +++++------------- lib/ingreedy/root_parser.rb | 2 +- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/lib/ingreedy/amount_parser.rb b/lib/ingreedy/amount_parser.rb index d6f291e..20f5de1 100644 --- a/lib/ingreedy/amount_parser.rb +++ b/lib/ingreedy/amount_parser.rb @@ -64,7 +64,7 @@ class AmountParser < Parslet::Parser private def current_locale - Ingreedy.dictionaries.current_locale + Ingreedy.dictionaries.current.locale end def word_digits diff --git a/lib/ingreedy/dictionary.rb b/lib/ingreedy/dictionary.rb index 3b329fe..20b113d 100644 --- a/lib/ingreedy/dictionary.rb +++ b/lib/ingreedy/dictionary.rb @@ -1,14 +1,15 @@ module Ingreedy class Dictionary - attr_reader :units, :numbers, :prepositions, :range_separators + attr_reader :units, :numbers, :prepositions, :range_separators, :locale attr_reader :imprecise_amounts - def initialize(units:, numbers: {}, prepositions: [], range_separators: %w{- ~}, imprecise_amounts: []) + def initialize(units:, numbers: {}, prepositions: [], range_separators: %w{- ~}, imprecise_amounts: [], locale: nil) @units = units @numbers = sort_by_length(numbers) @prepositions = prepositions @range_separators = range_separators @imprecise_amounts = imprecise_amounts + @locale = locale end # https://en.wikipedia.org/wiki/Number_Forms diff --git a/lib/ingreedy/dictionary_collection.rb b/lib/ingreedy/dictionary_collection.rb index 167ca45..ed47c51 100644 --- a/lib/ingreedy/dictionary_collection.rb +++ b/lib/ingreedy/dictionary_collection.rb @@ -8,29 +8,21 @@ def initialize end def []=(locale, attributes) - @collection[locale] = Dictionary.new(**attributes) + @collection[locale] = Dictionary.new(locale:, **attributes) end def current - fetch_dictionary(current_locale) - end - - def current_locale - find_locale - end - - private - - def find_locale candidate_locales.each do |locale| if dictionary = fetch_dictionary(locale) - return locale + return dictionary end end raise "No dictionary found for locales: #{candidate_locales}" end + private + def candidate_locales Array(Ingreedy.locale || i18n_gem_locales || :en) end @@ -46,7 +38,7 @@ def i18n_gem_locales end def fetch_dictionary(locale) - @collection[locale] ||= Dictionary.new **load_yaml(locale) + @collection[locale] ||= Dictionary.new(locale:, **load_yaml(locale)) rescue Errno::ENOENT end diff --git a/lib/ingreedy/root_parser.rb b/lib/ingreedy/root_parser.rb index 1f30219..ec4b884 100644 --- a/lib/ingreedy/root_parser.rb +++ b/lib/ingreedy/root_parser.rb @@ -126,7 +126,7 @@ def parse attr_reader :original_query def current_locale - Ingreedy.dictionaries.current_locale + Ingreedy.dictionaries.current.locale end def imprecise_amounts From 75e164330b7eb3de6c8f1ab3cca1d2aeb32a8332 Mon Sep 17 00:00:00 2001 From: sota-horiuchi Date: Tue, 26 Mar 2024 18:44:34 +0900 Subject: [PATCH 6/6] Fix: Dictionary initialization arg order --- lib/ingreedy/dictionary_collection.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ingreedy/dictionary_collection.rb b/lib/ingreedy/dictionary_collection.rb index ed47c51..ddcbb26 100644 --- a/lib/ingreedy/dictionary_collection.rb +++ b/lib/ingreedy/dictionary_collection.rb @@ -8,7 +8,7 @@ def initialize end def []=(locale, attributes) - @collection[locale] = Dictionary.new(locale:, **attributes) + @collection[locale] = Dictionary.new(**attributes, locale: locale) end def current @@ -38,7 +38,7 @@ def i18n_gem_locales end def fetch_dictionary(locale) - @collection[locale] ||= Dictionary.new(locale:, **load_yaml(locale)) + @collection[locale] ||= Dictionary.new(**load_yaml(locale), locale: locale) rescue Errno::ENOENT end