47
47
import operator
48
48
import re
49
49
import contextlib
50
+ import io
50
51
51
52
try :
52
53
import win32security
@@ -90,12 +91,32 @@ def surrogate_escape(error):
90
91
getcwdu = os .getcwdu
91
92
u = lambda x : codecs .unicode_escape_decode (x )[0 ]
92
93
codecs .register_error ('surrogateescape' , surrogate_escape )
94
+
95
+ @contextlib .contextmanager
96
+ def io_error_compat ():
97
+ try :
98
+ yield
99
+ except IOError as io_err :
100
+ # On Python 2, io.open raises IOError; transform to OSError for
101
+ # future compatibility.
102
+ os_err = OSError (* io_err .args )
103
+ os_err .filename = getattr (io_err , 'filename' , None )
104
+ raise os_err
105
+
93
106
##############################################################################
94
107
95
- __version__ = '6.2 '
108
+ __version__ = '7.0 '
96
109
__all__ = ['Path' , 'path' , 'CaseInsensitivePattern' ]
97
110
98
111
112
+ LINESEPS = [u ('\r \n ' ), u ('\r ' ), u ('\n ' )]
113
+ U_LINESEPS = LINESEPS + [u ('\u0085 ' ), u ('\u2028 ' ), u ('\u2029 ' )]
114
+ NEWLINE = re .compile ('|' .join (LINESEPS ))
115
+ U_NEWLINE = re .compile ('|' .join (U_LINESEPS ))
116
+ NL_END = re .compile (u (r'(?:{0})$' ).format (NEWLINE .pattern ))
117
+ U_NL_END = re .compile (u (r'(?:{0})$' ).format (U_NEWLINE .pattern ))
118
+
119
+
99
120
class TreeWalkWarning (Warning ):
100
121
pass
101
122
@@ -687,11 +708,13 @@ def glob(self, pattern):
687
708
# --- Reading or writing an entire file at once.
688
709
689
710
def open (self , * args , ** kwargs ):
690
- """ Open this file. Return a :class:`file` object.
711
+ """ Open this file and return a corresponding :class:`file` object.
691
712
692
- .. seealso:: :func:`python:open`
713
+ Keyword arguments work as in :func:`io.open`. If the file cannot be
714
+ opened, an :class:`~exceptions.OSError` is raised.
693
715
"""
694
- return open (self , * args , ** kwargs )
716
+ with io_error_compat ():
717
+ return io .open (self , * args , ** kwargs )
695
718
696
719
def bytes (self ):
697
720
""" Open this file, read all bytes, return them as a string. """
@@ -702,7 +725,7 @@ def chunks(self, size, *args, **kwargs):
702
725
""" Returns a generator yielding chunks of the file, so it can
703
726
be read piece by piece with a simple for loop.
704
727
705
- Any argument you pass after `size` will be passed to `open() `.
728
+ Any argument you pass after `size` will be passed to :meth: `open`.
706
729
707
730
:example:
708
731
@@ -712,7 +735,7 @@ def chunks(self, size, *args, **kwargs):
712
735
713
736
This will read the file by chunks of 8192 bytes.
714
737
"""
715
- with open (self , * args , ** kwargs ) as f :
738
+ with self . open (* args , ** kwargs ) as f :
716
739
while True :
717
740
d = f .read (size )
718
741
if not d :
@@ -735,34 +758,13 @@ def write_bytes(self, bytes, append=False):
735
758
def text (self , encoding = None , errors = 'strict' ):
736
759
r""" Open this file, read it in, return the content as a string.
737
760
738
- This method uses ``'U'`` mode, so ``'\r\n'`` and ``'\r'`` are
739
- automatically translated to ``'\n'``.
740
-
741
- Optional arguments:
742
- `encoding` - The Unicode encoding (or character set) of
743
- the file. If present, the content of the file is
744
- decoded and returned as a unicode object; otherwise
745
- it is returned as an 8-bit str.
746
- `errors` - How to handle Unicode errors; see :meth:`str.decode`
747
- for the options. Default is 'strict'.
761
+ All newline sequences are converted to ``'\n'``. Keyword arguments
762
+ will be passed to :meth:`open`.
748
763
749
764
.. seealso:: :meth:`lines`
750
765
"""
751
- if encoding is None :
752
- # 8-bit
753
- with self .open ('U' ) as f :
754
- return f .read ()
755
- else :
756
- # Unicode
757
- with codecs .open (self , 'r' , encoding , errors ) as f :
758
- # (Note - Can't use 'U' mode here, since codecs.open
759
- # doesn't support 'U' mode.)
760
- t = f .read ()
761
- return (t .replace (u ('\r \n ' ), u ('\n ' ))
762
- .replace (u ('\r \x85 ' ), u ('\n ' ))
763
- .replace (u ('\r ' ), u ('\n ' ))
764
- .replace (u ('\x85 ' ), u ('\n ' ))
765
- .replace (u ('\u2028 ' ), u ('\n ' )))
766
+ with self .open (mode = 'r' , encoding = encoding , errors = errors ) as f :
767
+ return U_NEWLINE .sub ('\n ' , f .read ())
766
768
767
769
def write_text (self , text , encoding = None , errors = 'strict' ,
768
770
linesep = os .linesep , append = False ):
@@ -831,28 +833,12 @@ def write_text(self, text, encoding=None, errors='strict',
831
833
"""
832
834
if isinstance (text , text_type ):
833
835
if linesep is not None :
834
- # Convert all standard end-of-line sequences to
835
- # ordinary newline characters.
836
- text = (text .replace (u ('\r \n ' ), u ('\n ' ))
837
- .replace (u ('\r \x85 ' ), u ('\n ' ))
838
- .replace (u ('\r ' ), u ('\n ' ))
839
- .replace (u ('\x85 ' ), u ('\n ' ))
840
- .replace (u ('\u2028 ' ), u ('\n ' )))
841
- text = text .replace (u ('\n ' ), linesep )
842
- if encoding is None :
843
- encoding = sys .getdefaultencoding ()
844
- bytes = text .encode (encoding , errors )
836
+ text = U_NEWLINE .sub (linesep , text )
837
+ text = text .encode (encoding or sys .getdefaultencoding (), errors )
845
838
else :
846
- # It is an error to specify an encoding if 'text' is
847
- # an 8-bit string.
848
839
assert encoding is None
849
-
850
- if linesep is not None :
851
- text = (text .replace ('\r \n ' , '\n ' )
852
- .replace ('\r ' , '\n ' ))
853
- bytes = text .replace ('\n ' , linesep )
854
-
855
- self .write_bytes (bytes , append )
840
+ text = NEWLINE .sub (linesep , text )
841
+ self .write_bytes (text , append = append )
856
842
857
843
def lines (self , encoding = None , errors = 'strict' , retain = True ):
858
844
r""" Open this file, read all lines, return them in a list.
@@ -917,33 +903,15 @@ def write_lines(self, lines, encoding=None, errors='strict',
917
903
mixed-encoding data, which can really confuse someone trying
918
904
to read the file later.
919
905
"""
920
- if append :
921
- mode = 'ab'
922
- else :
923
- mode = 'wb'
924
- with self .open (mode ) as f :
925
- for line in lines :
926
- isUnicode = isinstance (line , text_type )
906
+ with self .open ('ab' if append else 'wb' ) as f :
907
+ for l in lines :
908
+ isUnicode = isinstance (l , text_type )
927
909
if linesep is not None :
928
- # Strip off any existing line-end and add the
929
- # specified linesep string.
930
- if isUnicode :
931
- if line [- 2 :] in (u ('\r \n ' ), u ('\x0d \x85 ' )):
932
- line = line [:- 2 ]
933
- elif line [- 1 :] in (u ('\r ' ), u ('\n ' ),
934
- u ('\x85 ' ), u ('\u2028 ' )):
935
- line = line [:- 1 ]
936
- else :
937
- if line [- 2 :] == '\r \n ' :
938
- line = line [:- 2 ]
939
- elif line [- 1 :] in ('\r ' , '\n ' ):
940
- line = line [:- 1 ]
941
- line += linesep
910
+ pattern = U_NL_END if isUnicode else NL_END
911
+ l = pattern .sub ('' , l ) + linesep
942
912
if isUnicode :
943
- if encoding is None :
944
- encoding = sys .getdefaultencoding ()
945
- line = line .encode (encoding , errors )
946
- f .write (line )
913
+ l = l .encode (encoding or sys .getdefaultencoding (), errors )
914
+ f .write (l )
947
915
948
916
def read_md5 (self ):
949
917
""" Calculate the md5 hash for this file.
0 commit comments