@@ -5,6 +5,14 @@ extendr_module! {
5
5
mod stringer;
6
6
fn str_regex_count;
7
7
fn str_text_count;
8
+ fn str_regex_detect;
9
+ fn str_text_detect;
10
+ fn str_regex_extract;
11
+ fn str_text_extract;
12
+ fn single_str_regex_extract_all;
13
+ fn single_str_text_extract_all;
14
+
15
+ fn str_length;
8
16
}
9
17
10
18
#[ extendr]
@@ -13,24 +21,166 @@ fn str_regex_count(strings: Vec<String>, pattern: String) -> Vec<i64> {
13
21
Ok ( re) => re,
14
22
Err ( err) => panic ! ( "{}" , err) ,
15
23
} ;
16
- strings. iter ( ) . map ( |x| {
17
- let c = re. split ( x. as_str ( ) ) . count ( ) ;
18
- if c > 0 {
19
- ( c - 1 ) as i64
20
- } else {
21
- 0 as i64
22
- }
23
- } ) . collect ( )
24
+ strings
25
+ . iter ( )
26
+ . map ( |x| {
27
+ let c = re. split ( x. as_str ( ) ) . count ( ) ;
28
+ if c > 0 {
29
+ ( c - 1 ) as i64
30
+ } else {
31
+ 0 as i64
32
+ }
33
+ } )
34
+ . collect ( )
24
35
}
25
36
26
37
#[ extendr]
27
38
fn str_text_count ( strings : Vec < String > , sub : String ) -> Vec < i64 > {
28
- strings. iter ( ) . map ( |x| {
29
- let c = x. split ( & sub) . count ( ) ;
30
- if c > 0 {
31
- ( c - 1 ) as i64
32
- } else {
33
- 0 as i64
39
+ strings
40
+ . iter ( )
41
+ . map ( |x| {
42
+ let c = x. split ( & sub) . count ( ) ;
43
+ if c > 0 {
44
+ ( c - 1 ) as i64
45
+ } else {
46
+ 0 as i64
47
+ }
48
+ } )
49
+ . collect ( )
50
+ }
51
+
52
+ #[ extendr]
53
+ fn str_regex_detect ( strings : Vec < String > , pattern : String ) -> Vec < bool > {
54
+ let re = match Regex :: new ( pattern. as_str ( ) ) {
55
+ Ok ( re) => re,
56
+ Err ( err) => panic ! ( "{}" , err) ,
57
+ } ;
58
+ strings. iter ( ) . map ( |x| re. is_match ( x. as_str ( ) ) ) . collect ( )
59
+ }
60
+
61
+ #[ extendr]
62
+ fn str_text_detect ( strings : Vec < String > , sub : String ) -> Vec < bool > {
63
+ strings. iter ( ) . map ( |x| x. contains ( sub. as_str ( ) ) ) . collect ( )
64
+ }
65
+
66
+ #[ extendr]
67
+ fn str_regex_extract ( strings : Vec < String > , pattern : String ) -> Vec < String > {
68
+ let re = match Regex :: new ( pattern. as_str ( ) ) {
69
+ Ok ( re) => re,
70
+ Err ( err) => panic ! ( "{}" , err) ,
71
+ } ;
72
+ strings
73
+ . iter ( )
74
+ . map ( |x| {
75
+ re. find ( x. as_str ( ) )
76
+ . map ( |m| {
77
+ x. chars ( )
78
+ . skip ( m. start ( ) )
79
+ . take ( m. end ( ) - m. start ( ) )
80
+ . collect ( )
81
+ } )
82
+ . unwrap_or_else ( || "" . to_string ( ) )
83
+ } )
84
+ . collect ( )
85
+ }
86
+
87
+ #[ extendr]
88
+ fn str_text_extract ( strings : Vec < String > , sub : String ) -> Vec < String > {
89
+ strings
90
+ . iter ( )
91
+ . map ( |x| {
92
+ if x. contains ( sub. as_str ( ) ) {
93
+ sub. clone ( )
94
+ } else {
95
+ "" . to_string ( )
96
+ }
97
+ } )
98
+ . collect ( )
99
+ }
100
+
101
+ #[ extendr]
102
+ fn single_str_regex_extract_all ( string : String , pattern : String ) -> Vec < String > {
103
+ let re = match Regex :: new ( pattern. as_str ( ) ) {
104
+ Ok ( re) => re,
105
+ Err ( err) => panic ! ( "{}" , err) ,
106
+ } ;
107
+ let mut rs = Vec :: new ( ) ;
108
+ let mut index = 0 ;
109
+ loop {
110
+ match re. find_at ( string. as_str ( ) , index) {
111
+ Some ( m) => {
112
+ rs. push (
113
+ string. chars ( )
114
+ . skip ( m. start ( ) )
115
+ . take ( m. end ( ) - m. start ( ) )
116
+ . collect ( ) ,
117
+ ) ;
118
+ index = m. end ( ) ;
119
+ }
120
+ None => break ,
121
+ }
122
+ }
123
+ if rs. len ( ) == 0 {
124
+ rs. push ( "" . to_string ( ) ) ;
125
+ }
126
+ rs
127
+ }
128
+
129
+ #[ extendr]
130
+ fn single_str_text_extract_all ( string : String , sub : String ) -> Vec < String > {
131
+ let mut rs = Vec :: new ( ) ;
132
+ let mut index = 0 ;
133
+ loop {
134
+ let ( _, target) = string. split_at ( index) ;
135
+ match target. find ( sub. as_str ( ) ) {
136
+ Some ( m) => {
137
+ rs. push (
138
+ string. chars ( )
139
+ . skip ( m)
140
+ . take ( sub. len ( ) )
141
+ . collect ( ) ,
142
+ ) ;
143
+ index = m + sub. len ( ) ;
144
+ }
145
+ None => break ,
146
+ }
147
+ }
148
+ if rs. len ( ) == 0 {
149
+ rs. push ( "" . to_string ( ) ) ;
150
+ }
151
+ rs
152
+ }
153
+
154
+ #[ extendr]
155
+ fn str_length ( str : String ) -> i64 {
156
+ let mut count = 0 ;
157
+ let mut cur = 0 ;
158
+
159
+ for c in str. as_bytes ( ) {
160
+ if cur == 0 {
161
+ if ( c & 0b10000000 ) == 0 {
162
+ count += 1 ;
163
+ cur = 0 ;
164
+ continue ;
165
+ }
166
+ if ( c & 0b11100000 ) == 0b11000000 {
167
+ count += 1 ;
168
+ cur = 1 ;
169
+ continue ;
170
+ }
171
+ if ( c & 0b11110000 ) == 0b11100000 {
172
+ count += 1 ;
173
+ cur = 2 ;
174
+ continue ;
175
+ }
176
+ if ( c & 0b11111000 ) == 0b11110000 {
177
+ count += 1 ;
178
+ cur = 3 ;
179
+ continue ;
180
+ }
34
181
}
35
- } ) . collect ( )
36
- }
182
+ cur -= 1 ;
183
+ }
184
+
185
+ count
186
+ }
0 commit comments