Skip to content

Commit 731b580

Browse files
committed
src/dict: make query generators handle whitespace and newlines
Makes exactquerygenerator and deconjugationquerygenerator handle newlines and whitespace as if they weren't there. This is useful since these methods fail to capture results if it is split across multiple lines as can sometimes happen with expressions. MeCab handles this internally, so Memento should as well.
1 parent e75abf1 commit 731b580

3 files changed

Lines changed: 35 additions & 7 deletions

File tree

src/dict/deconjugator.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -672,20 +672,36 @@ static QString formatDerivation(QList<WordForm> derivations)
672672

673673
QList<ConjugationInfo> deconjugate(const QString query, bool sentenceMode)
674674
{
675+
static const QRegularExpression WHITESPACE_REGEX("\\s");
676+
675677
QList<ConjugationInfo> results;
676678
if (sentenceMode)
677679
{
678-
QString word = query;
679-
while (!word.isEmpty())
680+
QString conjugated = query;
681+
while (!conjugated.isEmpty())
680682
{
681-
ConjugationInfo detail = { word, word, QList<WordForm>(), "" };
683+
QString word = conjugated;
684+
word.remove(WHITESPACE_REGEX);
685+
ConjugationInfo detail = {
686+
word, conjugated, QList<WordForm>(), ""
687+
};
682688
deconjugateRecursive(detail, results);
683-
word.chop(1);
689+
do
690+
{
691+
conjugated.chop(1);
692+
}
693+
while (
694+
!conjugated.isEmpty() &&
695+
WHITESPACE_REGEX.match(conjugated.back()).hasMatch()
696+
);
697+
684698
}
685699
}
686700
else
687701
{
688-
ConjugationInfo detail = { query, query, QList<WordForm>(), ""};
702+
QString word = query;
703+
word.remove(WHITESPACE_REGEX);
704+
ConjugationInfo detail = { word, query, QList<WordForm>(), ""};
689705
deconjugateRecursive(detail, results);
690706
}
691707

src/dict/exactquerygenerator.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,18 @@ std::vector<SearchQuery> ExactQueryGenerator::generateQueries(
2828
QString query = text;
2929
while (!query.isEmpty())
3030
{
31-
SearchQuery sq;
31+
SearchQuery sq{};
3232
sq.deconj = query;
3333
sq.surface = query;
3434
sq.source = SearchQuery::Source::exact;
35-
queries.emplace_back(std::move(sq));
35+
queries.emplace_back(sq);
36+
37+
sq.deconj.remove(m_whitespaceRegex);
38+
if (sq.deconj != query)
39+
{
40+
queries.emplace_back(std::move(sq));
41+
}
42+
3643
query.chop(1);
3744
}
3845

src/dict/exactquerygenerator.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
#include "dict/querygenerator.h"
2424

25+
#include <QRegularExpression>
26+
2527
/**
2628
* @brief A class that generates queries from substrings exactly as they appear
2729
* in the source text.
@@ -64,4 +66,7 @@ class ExactQueryGenerator final : public QueryGenerator
6466
[[nodiscard]]
6567
std::vector<SearchQuery> generateQueries(
6668
const QString &text) const override;
69+
70+
private:
71+
const QRegularExpression m_whitespaceRegex{"\\s"};
6772
};

0 commit comments

Comments
 (0)