4747import operator
4848import re
4949import contextlib
50+ import io
5051
5152try :
5253 import win32security
@@ -90,12 +91,32 @@ def surrogate_escape(error):
9091 getcwdu = os .getcwdu
9192 u = lambda x : codecs .unicode_escape_decode (x )[0 ]
9293 codecs .register_error ('surrogateescape' , surrogate_escape )
94+
95+ @contextlib .contextmanager
96+ def io_error_compat ():
97+ try :
98+ yield
99+ except IOError as io_err :
100+ # On Python 2, io.open raises IOError; transform to OSError for
101+ # future compatibility.
102+ os_err = OSError (* io_err .args )
103+ os_err .filename = getattr (io_err , 'filename' , None )
104+ raise os_err
105+
93106##############################################################################
94107
95- __version__ = '6.2 '
108+ __version__ = '7.0 '
96109__all__ = ['Path' , 'path' , 'CaseInsensitivePattern' ]
97110
98111
112+ LINESEPS = [u ('\r \n ' ), u ('\r ' ), u ('\n ' )]
113+ U_LINESEPS = LINESEPS + [u ('\u0085 ' ), u ('\u2028 ' ), u ('\u2029 ' )]
114+ NEWLINE = re .compile ('|' .join (LINESEPS ))
115+ U_NEWLINE = re .compile ('|' .join (U_LINESEPS ))
116+ NL_END = re .compile (u (r'(?:{0})$' ).format (NEWLINE .pattern ))
117+ U_NL_END = re .compile (u (r'(?:{0})$' ).format (U_NEWLINE .pattern ))
118+
119+
99120class TreeWalkWarning (Warning ):
100121 pass
101122
@@ -687,11 +708,13 @@ def glob(self, pattern):
687708 # --- Reading or writing an entire file at once.
688709
689710 def open (self , * args , ** kwargs ):
690- """ Open this file. Return a :class:`file` object.
711+ """ Open this file and return a corresponding :class:`file` object.
691712
692- .. seealso:: :func:`python:open`
713+ Keyword arguments work as in :func:`io.open`. If the file cannot be
714+ opened, an :class:`~exceptions.OSError` is raised.
693715 """
694- return open (self , * args , ** kwargs )
716+ with io_error_compat ():
717+ return io .open (self , * args , ** kwargs )
695718
696719 def bytes (self ):
697720 """ Open this file, read all bytes, return them as a string. """
@@ -702,7 +725,7 @@ def chunks(self, size, *args, **kwargs):
702725 """ Returns a generator yielding chunks of the file, so it can
703726 be read piece by piece with a simple for loop.
704727
705- Any argument you pass after `size` will be passed to `open() `.
728+ Any argument you pass after `size` will be passed to :meth: `open`.
706729
707730 :example:
708731
@@ -712,7 +735,7 @@ def chunks(self, size, *args, **kwargs):
712735
713736 This will read the file by chunks of 8192 bytes.
714737 """
715- with open (self , * args , ** kwargs ) as f :
738+ with self . open (* args , ** kwargs ) as f :
716739 while True :
717740 d = f .read (size )
718741 if not d :
@@ -735,34 +758,13 @@ def write_bytes(self, bytes, append=False):
735758 def text (self , encoding = None , errors = 'strict' ):
736759 r""" Open this file, read it in, return the content as a string.
737760
738- This method uses ``'U'`` mode, so ``'\r\n'`` and ``'\r'`` are
739- automatically translated to ``'\n'``.
740-
741- Optional arguments:
742- `encoding` - The Unicode encoding (or character set) of
743- the file. If present, the content of the file is
744- decoded and returned as a unicode object; otherwise
745- it is returned as an 8-bit str.
746- `errors` - How to handle Unicode errors; see :meth:`str.decode`
747- for the options. Default is 'strict'.
761+ All newline sequences are converted to ``'\n'``. Keyword arguments
762+ will be passed to :meth:`open`.
748763
749764 .. seealso:: :meth:`lines`
750765 """
751- if encoding is None :
752- # 8-bit
753- with self .open ('U' ) as f :
754- return f .read ()
755- else :
756- # Unicode
757- with codecs .open (self , 'r' , encoding , errors ) as f :
758- # (Note - Can't use 'U' mode here, since codecs.open
759- # doesn't support 'U' mode.)
760- t = f .read ()
761- return (t .replace (u ('\r \n ' ), u ('\n ' ))
762- .replace (u ('\r \x85 ' ), u ('\n ' ))
763- .replace (u ('\r ' ), u ('\n ' ))
764- .replace (u ('\x85 ' ), u ('\n ' ))
765- .replace (u ('\u2028 ' ), u ('\n ' )))
766+ with self .open (mode = 'r' , encoding = encoding , errors = errors ) as f :
767+ return U_NEWLINE .sub ('\n ' , f .read ())
766768
767769 def write_text (self , text , encoding = None , errors = 'strict' ,
768770 linesep = os .linesep , append = False ):
@@ -831,28 +833,12 @@ def write_text(self, text, encoding=None, errors='strict',
831833 """
832834 if isinstance (text , text_type ):
833835 if linesep is not None :
834- # Convert all standard end-of-line sequences to
835- # ordinary newline characters.
836- text = (text .replace (u ('\r \n ' ), u ('\n ' ))
837- .replace (u ('\r \x85 ' ), u ('\n ' ))
838- .replace (u ('\r ' ), u ('\n ' ))
839- .replace (u ('\x85 ' ), u ('\n ' ))
840- .replace (u ('\u2028 ' ), u ('\n ' )))
841- text = text .replace (u ('\n ' ), linesep )
842- if encoding is None :
843- encoding = sys .getdefaultencoding ()
844- bytes = text .encode (encoding , errors )
836+ text = U_NEWLINE .sub (linesep , text )
837+ text = text .encode (encoding or sys .getdefaultencoding (), errors )
845838 else :
846- # It is an error to specify an encoding if 'text' is
847- # an 8-bit string.
848839 assert encoding is None
849-
850- if linesep is not None :
851- text = (text .replace ('\r \n ' , '\n ' )
852- .replace ('\r ' , '\n ' ))
853- bytes = text .replace ('\n ' , linesep )
854-
855- self .write_bytes (bytes , append )
840+ text = NEWLINE .sub (linesep , text )
841+ self .write_bytes (text , append = append )
856842
857843 def lines (self , encoding = None , errors = 'strict' , retain = True ):
858844 r""" Open this file, read all lines, return them in a list.
@@ -917,33 +903,15 @@ def write_lines(self, lines, encoding=None, errors='strict',
917903 mixed-encoding data, which can really confuse someone trying
918904 to read the file later.
919905 """
920- if append :
921- mode = 'ab'
922- else :
923- mode = 'wb'
924- with self .open (mode ) as f :
925- for line in lines :
926- isUnicode = isinstance (line , text_type )
906+ with self .open ('ab' if append else 'wb' ) as f :
907+ for l in lines :
908+ isUnicode = isinstance (l , text_type )
927909 if linesep is not None :
928- # Strip off any existing line-end and add the
929- # specified linesep string.
930- if isUnicode :
931- if line [- 2 :] in (u ('\r \n ' ), u ('\x0d \x85 ' )):
932- line = line [:- 2 ]
933- elif line [- 1 :] in (u ('\r ' ), u ('\n ' ),
934- u ('\x85 ' ), u ('\u2028 ' )):
935- line = line [:- 1 ]
936- else :
937- if line [- 2 :] == '\r \n ' :
938- line = line [:- 2 ]
939- elif line [- 1 :] in ('\r ' , '\n ' ):
940- line = line [:- 1 ]
941- line += linesep
910+ pattern = U_NL_END if isUnicode else NL_END
911+ l = pattern .sub ('' , l ) + linesep
942912 if isUnicode :
943- if encoding is None :
944- encoding = sys .getdefaultencoding ()
945- line = line .encode (encoding , errors )
946- f .write (line )
913+ l = l .encode (encoding or sys .getdefaultencoding (), errors )
914+ f .write (l )
947915
948916 def read_md5 (self ):
949917 """ Calculate the md5 hash for this file.
0 commit comments