1
- use std:: path:: Path ;
1
+ use std:: path:: { Path , PathBuf } ;
2
+ use ignore:: gitignore:: { Gitignore , GitignoreBuilder } ;
3
+ use ignore:: WalkBuilder ;
2
4
use tantivy:: { schema:: Schema , IndexWriter , doc, Term } ;
3
5
use anyhow:: Result ;
4
6
use async_trait:: async_trait;
@@ -11,7 +13,6 @@ use crate::intelligence::{TreeSitterFile, TSLanguage};
11
13
use crate :: symbol:: SymbolLocations ;
12
14
use crate :: schema:: build_schema;
13
15
use sha2:: { Sha256 , Digest } ;
14
- use globset:: { Glob , GlobSet , GlobSetBuilder } ;
15
16
16
17
pub struct File {
17
18
pub schema : Schema ,
@@ -60,13 +61,13 @@ impl File {
60
61
impl Indexable for File {
61
62
async fn index_repository ( & self , root_path : & Path , writer : & IndexWriter ) -> Result < ( ) > {
62
63
let existing_docs = load_existing_docs ( writer, & self . hash_field , & self . path_field ) ?;
63
- let mut gitignore = GlobSetBuilder :: new ( ) ;
64
+ let gitignore_manager = GitignoreManager :: new ( root_path . to_path_buf ( ) ) . await ? ;
64
65
65
66
traverse_and_index_files (
66
- root_path, writer, & self . schema , self . path_field , self . content_field ,
67
+ root_path, writer, self . path_field , self . content_field ,
67
68
self . symbol_locations_field , self . symbols_field , self . line_end_indices_field ,
68
69
self . lang_field , self . hash_field , self . content_insensitive_field ,
69
- & existing_docs, & mut gitignore , root_path ) . await
70
+ & existing_docs, & gitignore_manager ) . await
70
71
}
71
72
72
73
fn schema ( & self ) -> Schema {
@@ -93,73 +94,96 @@ fn load_existing_docs(writer: &IndexWriter, hash_field: &tantivy::schema::Field,
93
94
Ok ( existing_docs)
94
95
}
95
96
96
- async fn parse_gitignore ( current_path : & Path , builder : & mut GlobSetBuilder ) -> Result < ( ) > {
97
- let gitignore_path = current_path. join ( ".gitignore" ) ;
98
-
99
- if gitignore_path. exists ( ) {
100
- let contents = tokio:: fs:: read_to_string ( & gitignore_path) . await ?;
101
- for line in contents. lines ( ) {
102
- let trimmed_line = line. trim ( ) ;
103
- if !trimmed_line. starts_with ( '#' ) && !trimmed_line. is_empty ( ) {
104
- let absolute_pattern = if trimmed_line. starts_with ( '/' ) {
105
- // The pattern is already an absolute path, so we just use it as is
106
- current_path. join ( trimmed_line. trim_start_matches ( '/' ) )
107
- } else {
108
- // The pattern is a relative path, so we join it with the current path
109
- current_path. join ( trimmed_line)
110
- } ;
111
- let pattern = absolute_pattern. to_string_lossy ( ) . replace ( "\\ " , "/" ) ;
112
- // println!("Adding to gitignore: {}", pattern);
113
- builder. add ( Glob :: new ( & pattern) ?) ;
97
+ struct GitignoreManager {
98
+ root_path : PathBuf ,
99
+ gitignores : Vec < ( PathBuf , Gitignore ) > ,
100
+ }
101
+
102
+ impl GitignoreManager {
103
+ async fn new ( root_path : PathBuf ) -> Result < Self > {
104
+ let mut manager = GitignoreManager {
105
+ root_path,
106
+ gitignores : Vec :: new ( ) ,
107
+ } ;
108
+ manager. load_gitignores ( ) . await ?;
109
+ Ok ( manager)
110
+ }
111
+
112
+ async fn load_gitignores ( & mut self ) -> Result < ( ) > {
113
+ let walk = WalkBuilder :: new ( & self . root_path )
114
+ . hidden ( false )
115
+ . git_ignore ( false )
116
+ . build ( ) ;
117
+
118
+ for entry in walk {
119
+ let entry = entry?;
120
+ let path = entry. path ( ) ;
121
+ if path. file_name ( ) == Some ( ".gitignore" . as_ref ( ) ) {
122
+ let gitignore_dir = path. parent ( ) . unwrap ( ) . to_path_buf ( ) ;
123
+ let mut builder = GitignoreBuilder :: new ( & gitignore_dir) ;
124
+ builder. add ( path) ;
125
+ match builder. build ( ) {
126
+ Ok ( gitignore) => {
127
+ self . gitignores . push ( ( gitignore_dir, gitignore) ) ;
128
+ } ,
129
+ Err ( err) => {
130
+ eprintln ! ( "Error building gitignore for {:?}: {}" , path, err) ;
131
+ // Optionally, you can choose to return the error or continue
132
+ // return Err(err.into());
133
+ }
134
+ }
114
135
}
115
136
}
137
+
138
+ // Sort gitignores from most specific (deepest) to least specific (root)
139
+ self . gitignores . sort_by ( |a, b| b. 0 . components ( ) . count ( ) . cmp ( & a. 0 . components ( ) . count ( ) ) ) ;
140
+
141
+ Ok ( ( ) )
116
142
}
117
143
118
- Ok ( ( ) )
144
+ fn is_ignored ( & self , path : & Path ) -> bool {
145
+ for ( dir, gitignore) in & self . gitignores {
146
+ if path. starts_with ( dir) {
147
+ let relative_path = path. strip_prefix ( dir) . unwrap ( ) ;
148
+ match gitignore. matched ( relative_path, false ) {
149
+ ignore:: Match :: Ignore ( _) => return true ,
150
+ ignore:: Match :: Whitelist ( _) => return false ,
151
+ ignore:: Match :: None => continue ,
152
+ }
153
+ }
154
+ }
155
+ false
156
+ }
119
157
}
120
158
121
-
122
159
fn traverse_and_index_files < ' a > (
123
160
path : & ' a Path ,
124
161
writer : & ' a IndexWriter ,
125
- schema : & ' a Schema ,
126
162
path_field : tantivy:: schema:: Field ,
127
163
content_field : tantivy:: schema:: Field ,
128
164
symbol_locations_field : tantivy:: schema:: Field ,
129
165
symbols_field : tantivy:: schema:: Field ,
130
166
line_end_indices_field : tantivy:: schema:: Field ,
131
167
lang_field : tantivy:: schema:: Field ,
132
168
hash_field : tantivy:: schema:: Field ,
133
- content_insensitive_field : tantivy:: schema:: Field , // New field
169
+ content_insensitive_field : tantivy:: schema:: Field ,
134
170
existing_docs : & ' a HashMap < String , String > ,
135
- gitignore : & ' a mut GlobSetBuilder ,
136
- root_path : & ' a Path ,
171
+ gitignore_manager : & ' a GitignoreManager ,
137
172
) -> BoxFuture < ' a , Result < ( ) > > {
138
173
Box :: pin ( async move {
139
- // Parse .gitignore in the current directory and update the builder
140
- parse_gitignore ( path, gitignore) . await ?;
141
-
142
- // Build the GlobSet from the builder
143
- let globset = gitignore. build ( ) ?;
144
-
145
174
let mut entries = fs:: read_dir ( path) . await ?;
146
175
while let Some ( entry) = entries. next_entry ( ) . await ? {
147
176
let path = entry. path ( ) ;
148
-
149
- // Convert the path to an absolute path
150
- let absolute_path = path. canonicalize ( ) ?;
151
- let absolute_path_str = absolute_path. to_string_lossy ( ) . replace ( "\\ " , "/" ) ;
152
-
153
- // Skip paths that match .gitignore patterns
154
- if globset. is_match ( & absolute_path_str) {
177
+
178
+ if gitignore_manager. is_ignored ( & path) {
155
179
continue ;
156
180
}
157
-
158
- if path. is_dir ( ) {
181
+
182
+ if path. is_dir ( ) {
159
183
traverse_and_index_files (
160
- & path, writer, schema , path_field, content_field, symbol_locations_field,
184
+ & path, writer, path_field, content_field, symbol_locations_field,
161
185
symbols_field, line_end_indices_field, lang_field, hash_field, content_insensitive_field,
162
- existing_docs, gitignore , root_path ) . await ?;
186
+ existing_docs, gitignore_manager ) . await ?;
163
187
} else if path. is_file ( ) {
164
188
let path_clone = path. clone ( ) ;
165
189
let content = spawn_blocking ( move || std:: fs:: read ( & path_clone) ) . await ??;
@@ -173,6 +197,9 @@ fn traverse_and_index_files<'a>(
173
197
let mut hasher = Sha256 :: new ( ) ;
174
198
hasher. update ( & content_str) ;
175
199
let hash = format ! ( "{:x}" , hasher. finalize( ) ) ;
200
+
201
+ let absolute_path = path. canonicalize ( ) ?;
202
+ let absolute_path_str = absolute_path. to_string_lossy ( ) . replace ( "\\ " , "/" ) ;
176
203
177
204
let path_str = absolute_path_str. clone ( ) ;
178
205
if let Some ( existing_hash) = existing_docs. get ( & path_str) {
@@ -224,7 +251,7 @@ fn traverse_and_index_files<'a>(
224
251
// Convert content to lower case for case-insensitive search
225
252
let content_insensitive = content_str. to_lowercase ( ) ;
226
253
227
- // println!("{}", absolute_path_str);
254
+ println ! ( "{}" , absolute_path_str) ;
228
255
229
256
let doc = tantivy:: doc!(
230
257
path_field => path_str,
0 commit comments