Skip to content

Commit 215fa2a

Browse files
authored
Merge pull request #4 from pedrobiqua/develop
Fixing Bug
2 parents 4dc3d60 + bd20bb4 commit 215fa2a

File tree

3 files changed

+39
-14
lines changed

3 files changed

+39
-14
lines changed

lib/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ add_library(
1010

1111
target_include_directories(search_engine PUBLIC include)
1212

13+
target_link_libraries(search_engine
14+
PUBLIC
15+
icuio
16+
icuuc
17+
)
18+
1319
install(
1420
TARGETS search_engine
1521
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}

lib/include/inverted_index.h

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ namespace inverted_index {
4444
map_str_docs add_doc(map_str_docs& mp, const str& doc_name, str& text);
4545
list_docs find_doc(map_str_docs& mp, str& word);
4646
list_docs find_answer(map_str_docs& mp, str& input);
47+
void shrink_string(std::string* input);
4748
}
4849

4950
#endif // INVERTED_INDEX

lib/src/inverted_index.cpp

+32-14
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
11
#include "inverted_index.h"
2+
#include <locale>
3+
#include <bits/stdc++.h>
4+
#include <cwctype>
5+
#include <unicode/locid.h>
6+
#include <unicode/unistr.h>
7+
#include <unicode/ustream.h>
28

3-
inverted_index::vector_str inverted_index::split(inverted_index::str& s, const str& delimiter) {
4-
std::vector<str> tokens;
9+
10+
using namespace inverted_index;
11+
12+
vector_str inverted_index::split(str& s, const str& delimiter) {
13+
vector_str tokens;
514
size_t pos = 0;
615
std::string token;
716
while ((pos = s.find(delimiter)) != std::string::npos) {
@@ -14,11 +23,23 @@ inverted_index::vector_str inverted_index::split(inverted_index::str& s, const s
1423
return tokens;
1524
}
1625

17-
inverted_index::map_str_docs inverted_index::add_doc(inverted_index::map_str_docs& mp, const inverted_index::str& doc_name, inverted_index::str& text){
26+
void inverted_index::shrink_string(std::string* input) {
27+
if (!input) return; // Verifica se o ponteiro é válido
28+
29+
icu::UnicodeString ustr(input->c_str(), "UTF-8");
30+
ustr.toLower();
31+
std::string result;
32+
ustr.toUTF8String(result);
33+
*input = result;
34+
}
35+
36+
map_str_docs inverted_index::add_doc(map_str_docs& mp, const str& doc_name, str& text){
37+
38+
shrink_string(&text);
1839
auto words = inverted_index::split(text, DELIMITER);
1940

2041
for(const auto& word : words) {
21-
inverted_index::docs target = {doc_name, 1};
42+
docs target = {doc_name, 1};
2243
// Procura pelo elemento dentro da lista da palavra
2344
auto it = std::find(mp[word].begin(), mp[word].end(), target);
2445
if(it != mp[word].end()){
@@ -31,22 +52,19 @@ inverted_index::map_str_docs inverted_index::add_doc(inverted_index::map_str_doc
3152
return mp;
3253
}
3354

34-
inverted_index::list_docs inverted_index::find_doc(inverted_index::map_str_docs& mp, str& word){
55+
list_docs inverted_index::find_doc(map_str_docs& mp, str& word){
3556
return mp[word];
3657
}
3758

38-
inverted_index::list_docs inverted_index::find_answer(inverted_index::map_str_docs& mp, inverted_index::str& input) {
39-
inverted_index::list_docs result;
40-
inverted_index::set_docs unique_docs;
41-
42-
// Coloca o texto em minúsculas
43-
std::transform(input.begin(), input.end(), input.begin(), to_lowercase);
59+
list_docs inverted_index::find_answer(map_str_docs& mp, str& input) {
60+
list_docs result;
61+
set_docs unique_docs;
4462

45-
// Divide o input em palavras com base no delimitador
46-
auto words = split(input, DELIMITER);
63+
shrink_string(&input);
64+
auto words = inverted_index::split(input, DELIMITER);
4765

4866
for (auto& word : words) {
49-
list_docs docs = find_doc(mp, word); // Busca documentos relacionados à palavra
67+
list_docs docs = inverted_index::find_doc(mp, word); // Busca documentos relacionados à palavra
5068
for (const auto& d : docs) {
5169
unique_docs.insert(d); // Armazena apenas os nomes dos documentos
5270
}

0 commit comments

Comments
 (0)