@@ -47,21 +47,57 @@ pub enum ScanSourceRef<'a> {
4747}
4848
4949/// A single source to scan from
50- #[ derive( Debug ) ]
50+ #[ derive( Debug , Clone ) ]
5151pub enum ScanSource {
52- Path ( PathBuf ) ,
53- File ( File ) ,
52+ Path ( Arc < Path > ) ,
53+ File ( Arc < File > ) ,
5454 Buffer ( MemSlice ) ,
5555}
5656
5757impl ScanSource {
58+ pub fn from_sources ( sources : ScanSources ) -> Result < Self , ScanSources > {
59+ if sources. len ( ) == 1 {
60+ match sources {
61+ ScanSources :: Paths ( ps) => Ok ( Self :: Path ( ps. as_ref ( ) [ 0 ] . clone ( ) . into ( ) ) ) ,
62+ ScanSources :: Files ( fs) => {
63+ assert_eq ! ( fs. len( ) , 1 ) ;
64+ let ptr: * const File = Arc :: into_raw ( fs) as * const File ;
65+ // SAFETY: A [T] with length 1 can be interpreted as T
66+ let f: Arc < File > = unsafe { Arc :: from_raw ( ptr) } ;
67+
68+ Ok ( Self :: File ( f) )
69+ } ,
70+ ScanSources :: Buffers ( bs) => Ok ( Self :: Buffer ( bs. as_ref ( ) [ 0 ] . clone ( ) ) ) ,
71+ }
72+ } else {
73+ Err ( sources)
74+ }
75+ }
76+
5877 pub fn into_sources ( self ) -> ScanSources {
5978 match self {
60- ScanSource :: Path ( p) => ScanSources :: Paths ( [ p] . into ( ) ) ,
61- ScanSource :: File ( f) => ScanSources :: Files ( [ f] . into ( ) ) ,
79+ ScanSource :: Path ( p) => ScanSources :: Paths ( [ p. to_path_buf ( ) ] . into ( ) ) ,
80+ ScanSource :: File ( f) => {
81+ let ptr: * const [ File ] = std:: ptr:: slice_from_raw_parts ( Arc :: into_raw ( f) , 1 ) ;
82+ // SAFETY: A T can be interpreted as [T] with length 1.
83+ let fs: Arc < [ File ] > = unsafe { Arc :: from_raw ( ptr) } ;
84+ ScanSources :: Files ( fs)
85+ } ,
6286 ScanSource :: Buffer ( m) => ScanSources :: Buffers ( [ m] . into ( ) ) ,
6387 }
6488 }
89+
90+ pub fn as_scan_source_ref ( & self ) -> ScanSourceRef {
91+ match self {
92+ ScanSource :: Path ( path) => ScanSourceRef :: Path ( path. as_ref ( ) ) ,
93+ ScanSource :: File ( file) => ScanSourceRef :: File ( file. as_ref ( ) ) ,
94+ ScanSource :: Buffer ( mem_slice) => ScanSourceRef :: Buffer ( mem_slice) ,
95+ }
96+ }
97+
98+ pub fn run_async ( & self ) -> bool {
99+ self . as_scan_source_ref ( ) . run_async ( )
100+ }
65101}
66102
67103/// An iterator for [`ScanSources`]
@@ -261,8 +297,15 @@ impl ScanSourceRef<'_> {
261297 // @TODO: I would like to remove this function eventually.
262298 pub fn into_owned ( & self ) -> PolarsResult < ScanSource > {
263299 Ok ( match self {
264- ScanSourceRef :: Path ( path) => ScanSource :: Path ( path. to_path_buf ( ) ) ,
265- _ => ScanSource :: Buffer ( self . to_memslice ( ) ?) ,
300+ ScanSourceRef :: Path ( path) => ScanSource :: Path ( ( * path) . into ( ) ) ,
301+ ScanSourceRef :: File ( file) => {
302+ if let Ok ( file) = file. try_clone ( ) {
303+ ScanSource :: File ( Arc :: new ( file) )
304+ } else {
305+ ScanSource :: Buffer ( self . to_memslice ( ) ?)
306+ }
307+ } ,
308+ ScanSourceRef :: Buffer ( buffer) => ScanSource :: Buffer ( ( * buffer) . clone ( ) ) ,
266309 } )
267310 }
268311
@@ -335,6 +378,10 @@ impl ScanSourceRef<'_> {
335378 Self :: Buffer ( buff) => Ok ( DynByteSource :: from ( ( * buff) . clone ( ) ) ) ,
336379 }
337380 }
381+
382+ pub ( crate ) fn run_async ( & self ) -> bool {
383+ matches ! ( self , Self :: Path ( p) if polars_io:: is_cloud_url( p) || polars_core:: config:: force_async( ) )
384+ }
338385}
339386
340387impl < ' a > Iterator for ScanSourceIter < ' a > {
0 commit comments