@@ -175,9 +175,10 @@ def safe_process_datapackage(self, dp: Package):
175
175
descriptor ['encoding' ] = self .options ['encoding' ]
176
176
self .options ['custom_parsers' ] = self .get_custom_parsers (self .options .get ('custom_parsers' ))
177
177
self .options .setdefault ('ignore_blank_headers' , True )
178
+ if 'headers' not in self .options :
179
+ self .options .setdefault ('skip_rows' , [{'type' : 'preset' , 'value' : 'auto' }])
178
180
self .options .setdefault ('headers' , 1 )
179
181
self .options .setdefault ('sample_size' , 1000 )
180
- self .options .setdefault ('skip_rows' , [{'type' : 'preset' , 'value' : 'auto' }])
181
182
stream : Stream = Stream (self .load_source , ** self .options ).open ()
182
183
if len (stream .headers ) != len (set (stream .headers )):
183
184
if not self .deduplicate_headers :
@@ -215,11 +216,16 @@ def safe_process_datapackage(self, dp: Package):
215
216
return dp
216
217
217
218
def stripper (self , iterator ):
219
+ whitespace = set (' \t \n \r ' )
218
220
for r in iterator :
219
- yield dict (
220
- (k , v .strip ()) if isinstance (v , str ) else (k , v )
221
- for k , v in r .items ()
222
- )
221
+ for k , v in r .items ():
222
+ if v and isinstance (v , str ) and (v [- 1 ] in whitespace or v [0 ] in whitespace ):
223
+ r [k ] = v .strip ()
224
+ yield r
225
+ # yield dict(
226
+ # (k, v.strip()) if isinstance(v, str) else (k, v)
227
+ # for k, v in r.items()
228
+ # )
223
229
224
230
def limiter (self , iterator ):
225
231
count = 0
0 commit comments