1
1
#include " inverted_index.h"
2
+ #include < locale>
3
+ #include < bits/stdc++.h>
4
+ #include < cwctype>
5
+ #include < unicode/locid.h>
6
+ #include < unicode/unistr.h>
7
+ #include < unicode/ustream.h>
2
8
3
- inverted_index::vector_str inverted_index::split (inverted_index::str& s, const str& delimiter) {
4
- std::vector<str> tokens;
9
+
10
+ using namespace inverted_index ;
11
+
12
+ vector_str inverted_index::split (str& s, const str& delimiter) {
13
+ vector_str tokens;
5
14
size_t pos = 0 ;
6
15
std::string token;
7
16
while ((pos = s.find (delimiter)) != std::string::npos) {
@@ -14,11 +23,23 @@ inverted_index::vector_str inverted_index::split(inverted_index::str& s, const s
14
23
return tokens;
15
24
}
16
25
17
- inverted_index::map_str_docs inverted_index::add_doc (inverted_index::map_str_docs& mp, const inverted_index::str& doc_name, inverted_index::str& text){
26
+ void inverted_index::shrink_string (std::string* input) {
27
+ if (!input) return ; // Verifica se o ponteiro é válido
28
+
29
+ icu::UnicodeString ustr (input->c_str (), " UTF-8" );
30
+ ustr.toLower ();
31
+ std::string result;
32
+ ustr.toUTF8String (result);
33
+ *input = result;
34
+ }
35
+
36
+ map_str_docs inverted_index::add_doc (map_str_docs& mp, const str& doc_name, str& text){
37
+
38
+ shrink_string (&text);
18
39
auto words = inverted_index::split (text, DELIMITER);
19
40
20
41
for (const auto & word : words) {
21
- inverted_index:: docs target = {doc_name, 1 };
42
+ docs target = {doc_name, 1 };
22
43
// Procura pelo elemento dentro da lista da palavra
23
44
auto it = std::find (mp[word].begin (), mp[word].end (), target);
24
45
if (it != mp[word].end ()){
@@ -31,22 +52,19 @@ inverted_index::map_str_docs inverted_index::add_doc(inverted_index::map_str_doc
31
52
return mp;
32
53
}
33
54
34
- inverted_index:: list_docs inverted_index::find_doc (inverted_index:: map_str_docs& mp, str& word){
55
+ list_docs inverted_index::find_doc (map_str_docs& mp, str& word){
35
56
return mp[word];
36
57
}
37
58
38
- inverted_index::list_docs inverted_index::find_answer (inverted_index::map_str_docs& mp, inverted_index::str& input) {
39
- inverted_index::list_docs result;
40
- inverted_index::set_docs unique_docs;
41
-
42
- // Coloca o texto em minúsculas
43
- std::transform (input.begin (), input.end (), input.begin (), to_lowercase);
59
+ list_docs inverted_index::find_answer (map_str_docs& mp, str& input) {
60
+ list_docs result;
61
+ set_docs unique_docs;
44
62
45
- // Divide o input em palavras com base no delimitador
46
- auto words = split (input, DELIMITER);
63
+ shrink_string (& input);
64
+ auto words = inverted_index:: split (input, DELIMITER);
47
65
48
66
for (auto & word : words) {
49
- list_docs docs = find_doc (mp, word); // Busca documentos relacionados à palavra
67
+ list_docs docs = inverted_index:: find_doc (mp, word); // Busca documentos relacionados à palavra
50
68
for (const auto & d : docs) {
51
69
unique_docs.insert (d); // Armazena apenas os nomes dos documentos
52
70
}
0 commit comments