@@ -47,21 +47,57 @@ pub enum ScanSourceRef<'a> {
47
47
}
48
48
49
49
/// A single source to scan from
50
- #[ derive( Debug ) ]
50
+ #[ derive( Debug , Clone ) ]
51
51
pub enum ScanSource {
52
- Path ( PathBuf ) ,
53
- File ( File ) ,
52
+ Path ( Arc < Path > ) ,
53
+ File ( Arc < File > ) ,
54
54
Buffer ( MemSlice ) ,
55
55
}
56
56
57
57
impl ScanSource {
58
+ pub fn from_sources ( sources : ScanSources ) -> Result < Self , ScanSources > {
59
+ if sources. len ( ) == 1 {
60
+ match sources {
61
+ ScanSources :: Paths ( ps) => Ok ( Self :: Path ( ps. as_ref ( ) [ 0 ] . clone ( ) . into ( ) ) ) ,
62
+ ScanSources :: Files ( fs) => {
63
+ assert_eq ! ( fs. len( ) , 1 ) ;
64
+ let ptr: * const File = Arc :: into_raw ( fs) as * const File ;
65
+ // SAFETY: A [T] with length 1 can be interpreted as T
66
+ let f: Arc < File > = unsafe { Arc :: from_raw ( ptr) } ;
67
+
68
+ Ok ( Self :: File ( f) )
69
+ } ,
70
+ ScanSources :: Buffers ( bs) => Ok ( Self :: Buffer ( bs. as_ref ( ) [ 0 ] . clone ( ) ) ) ,
71
+ }
72
+ } else {
73
+ Err ( sources)
74
+ }
75
+ }
76
+
58
77
pub fn into_sources ( self ) -> ScanSources {
59
78
match self {
60
- ScanSource :: Path ( p) => ScanSources :: Paths ( [ p] . into ( ) ) ,
61
- ScanSource :: File ( f) => ScanSources :: Files ( [ f] . into ( ) ) ,
79
+ ScanSource :: Path ( p) => ScanSources :: Paths ( [ p. to_path_buf ( ) ] . into ( ) ) ,
80
+ ScanSource :: File ( f) => {
81
+ let ptr: * const [ File ] = std:: ptr:: slice_from_raw_parts ( Arc :: into_raw ( f) , 1 ) ;
82
+ // SAFETY: A T can be interpreted as [T] with length 1.
83
+ let fs: Arc < [ File ] > = unsafe { Arc :: from_raw ( ptr) } ;
84
+ ScanSources :: Files ( fs)
85
+ } ,
62
86
ScanSource :: Buffer ( m) => ScanSources :: Buffers ( [ m] . into ( ) ) ,
63
87
}
64
88
}
89
+
90
+ pub fn as_scan_source_ref ( & self ) -> ScanSourceRef {
91
+ match self {
92
+ ScanSource :: Path ( path) => ScanSourceRef :: Path ( path. as_ref ( ) ) ,
93
+ ScanSource :: File ( file) => ScanSourceRef :: File ( file. as_ref ( ) ) ,
94
+ ScanSource :: Buffer ( mem_slice) => ScanSourceRef :: Buffer ( mem_slice) ,
95
+ }
96
+ }
97
+
98
+ pub fn run_async ( & self ) -> bool {
99
+ self . as_scan_source_ref ( ) . run_async ( )
100
+ }
65
101
}
66
102
67
103
/// An iterator for [`ScanSources`]
@@ -261,8 +297,15 @@ impl ScanSourceRef<'_> {
261
297
// @TODO: I would like to remove this function eventually.
262
298
pub fn into_owned ( & self ) -> PolarsResult < ScanSource > {
263
299
Ok ( match self {
264
- ScanSourceRef :: Path ( path) => ScanSource :: Path ( path. to_path_buf ( ) ) ,
265
- _ => ScanSource :: Buffer ( self . to_memslice ( ) ?) ,
300
+ ScanSourceRef :: Path ( path) => ScanSource :: Path ( ( * path) . into ( ) ) ,
301
+ ScanSourceRef :: File ( file) => {
302
+ if let Ok ( file) = file. try_clone ( ) {
303
+ ScanSource :: File ( Arc :: new ( file) )
304
+ } else {
305
+ ScanSource :: Buffer ( self . to_memslice ( ) ?)
306
+ }
307
+ } ,
308
+ ScanSourceRef :: Buffer ( buffer) => ScanSource :: Buffer ( ( * buffer) . clone ( ) ) ,
266
309
} )
267
310
}
268
311
@@ -335,6 +378,10 @@ impl ScanSourceRef<'_> {
335
378
Self :: Buffer ( buff) => Ok ( DynByteSource :: from ( ( * buff) . clone ( ) ) ) ,
336
379
}
337
380
}
381
+
382
+ pub ( crate ) fn run_async ( & self ) -> bool {
383
+ matches ! ( self , Self :: Path ( p) if polars_io:: is_cloud_url( p) || polars_core:: config:: force_async( ) )
384
+ }
338
385
}
339
386
340
387
impl < ' a > Iterator for ScanSourceIter < ' a > {
0 commit comments