Skip to content

Commit 10bb90e

Browse files
authored
gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089)
1 parent e38b43c commit 10bb90e

File tree

8 files changed

+337
-108
lines changed

8 files changed

+337
-108
lines changed

Include/internal/pycore_fileutils.h

+2
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,8 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t
290290
extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd);
291291
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
292292

293+
extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize);
294+
293295
// Macros to protect CRT calls against instant termination when passed an
294296
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
295297
// Usage:

Lib/ntpath.py

+68-48
Original file line numberDiff line numberDiff line change
@@ -167,56 +167,76 @@ def splitdrive(p):
167167
return drive, root + tail
168168

169169

170-
def splitroot(p):
171-
"""Split a pathname into drive, root and tail. The drive is defined
172-
exactly as in splitdrive(). On Windows, the root may be a single path
173-
separator or an empty string. The tail contains anything after the root.
174-
For example:
175-
176-
splitroot('//server/share/') == ('//server/share', '/', '')
177-
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
178-
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
179-
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
180-
"""
181-
p = os.fspath(p)
182-
if isinstance(p, bytes):
183-
sep = b'\\'
184-
altsep = b'/'
185-
colon = b':'
186-
unc_prefix = b'\\\\?\\UNC\\'
187-
empty = b''
188-
else:
189-
sep = '\\'
190-
altsep = '/'
191-
colon = ':'
192-
unc_prefix = '\\\\?\\UNC\\'
193-
empty = ''
194-
normp = p.replace(altsep, sep)
195-
if normp[:1] == sep:
196-
if normp[1:2] == sep:
197-
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
198-
# Device drives, e.g. \\.\device or \\?\device
199-
start = 8 if normp[:8].upper() == unc_prefix else 2
200-
index = normp.find(sep, start)
201-
if index == -1:
202-
return p, empty, empty
203-
index2 = normp.find(sep, index + 1)
204-
if index2 == -1:
205-
return p, empty, empty
206-
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
170+
try:
171+
from nt import _path_splitroot_ex
172+
except ImportError:
173+
def splitroot(p):
174+
"""Split a pathname into drive, root and tail. The drive is defined
175+
exactly as in splitdrive(). On Windows, the root may be a single path
176+
separator or an empty string. The tail contains anything after the root.
177+
For example:
178+
179+
splitroot('//server/share/') == ('//server/share', '/', '')
180+
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
181+
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
182+
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
183+
"""
184+
p = os.fspath(p)
185+
if isinstance(p, bytes):
186+
sep = b'\\'
187+
altsep = b'/'
188+
colon = b':'
189+
unc_prefix = b'\\\\?\\UNC\\'
190+
empty = b''
207191
else:
208-
# Relative path with root, e.g. \Windows
209-
return empty, p[:1], p[1:]
210-
elif normp[1:2] == colon:
211-
if normp[2:3] == sep:
212-
# Absolute drive-letter path, e.g. X:\Windows
213-
return p[:2], p[2:3], p[3:]
192+
sep = '\\'
193+
altsep = '/'
194+
colon = ':'
195+
unc_prefix = '\\\\?\\UNC\\'
196+
empty = ''
197+
normp = p.replace(altsep, sep)
198+
if normp[:1] == sep:
199+
if normp[1:2] == sep:
200+
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
201+
# Device drives, e.g. \\.\device or \\?\device
202+
start = 8 if normp[:8].upper() == unc_prefix else 2
203+
index = normp.find(sep, start)
204+
if index == -1:
205+
return p, empty, empty
206+
index2 = normp.find(sep, index + 1)
207+
if index2 == -1:
208+
return p, empty, empty
209+
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
210+
else:
211+
# Relative path with root, e.g. \Windows
212+
return empty, p[:1], p[1:]
213+
elif normp[1:2] == colon:
214+
if normp[2:3] == sep:
215+
# Absolute drive-letter path, e.g. X:\Windows
216+
return p[:2], p[2:3], p[3:]
217+
else:
218+
# Relative path with drive, e.g. X:Windows
219+
return p[:2], empty, p[2:]
214220
else:
215-
# Relative path with drive, e.g. X:Windows
216-
return p[:2], empty, p[2:]
217-
else:
218-
# Relative path, e.g. Windows
219-
return empty, empty, p
221+
# Relative path, e.g. Windows
222+
return empty, empty, p
223+
else:
224+
def splitroot(p):
225+
"""Split a pathname into drive, root and tail. The drive is defined
226+
exactly as in splitdrive(). On Windows, the root may be a single path
227+
separator or an empty string. The tail contains anything after the root.
228+
For example:
229+
230+
splitroot('//server/share/') == ('//server/share', '/', '')
231+
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
232+
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
233+
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
234+
"""
235+
p = os.fspath(p)
236+
if isinstance(p, bytes):
237+
drive, root, tail = _path_splitroot_ex(os.fsdecode(p))
238+
return os.fsencode(drive), os.fsencode(root), os.fsencode(tail)
239+
return _path_splitroot_ex(p)
220240

221241

222242
# Split a path in head (everything up to the last '/') and tail (the

Lib/posixpath.py

+47-27
Original file line numberDiff line numberDiff line change
@@ -134,33 +134,53 @@ def splitdrive(p):
134134
return p[:0], p
135135

136136

137-
def splitroot(p):
138-
"""Split a pathname into drive, root and tail. On Posix, drive is always
139-
empty; the root may be empty, a single slash, or two slashes. The tail
140-
contains anything after the root. For example:
141-
142-
splitroot('foo/bar') == ('', '', 'foo/bar')
143-
splitroot('/foo/bar') == ('', '/', 'foo/bar')
144-
splitroot('//foo/bar') == ('', '//', 'foo/bar')
145-
splitroot('///foo/bar') == ('', '/', '//foo/bar')
146-
"""
147-
p = os.fspath(p)
148-
if isinstance(p, bytes):
149-
sep = b'/'
150-
empty = b''
151-
else:
152-
sep = '/'
153-
empty = ''
154-
if p[:1] != sep:
155-
# Relative path, e.g.: 'foo'
156-
return empty, empty, p
157-
elif p[1:2] != sep or p[2:3] == sep:
158-
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
159-
return empty, sep, p[1:]
160-
else:
161-
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
162-
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
163-
return empty, p[:2], p[2:]
137+
try:
138+
from posix import _path_splitroot_ex
139+
except ImportError:
140+
def splitroot(p):
141+
"""Split a pathname into drive, root and tail. On Posix, drive is always
142+
empty; the root may be empty, a single slash, or two slashes. The tail
143+
contains anything after the root. For example:
144+
145+
splitroot('foo/bar') == ('', '', 'foo/bar')
146+
splitroot('/foo/bar') == ('', '/', 'foo/bar')
147+
splitroot('//foo/bar') == ('', '//', 'foo/bar')
148+
splitroot('///foo/bar') == ('', '/', '//foo/bar')
149+
"""
150+
p = os.fspath(p)
151+
if isinstance(p, bytes):
152+
sep = b'/'
153+
empty = b''
154+
else:
155+
sep = '/'
156+
empty = ''
157+
if p[:1] != sep:
158+
# Relative path, e.g.: 'foo'
159+
return empty, empty, p
160+
elif p[1:2] != sep or p[2:3] == sep:
161+
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
162+
return empty, sep, p[1:]
163+
else:
164+
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
165+
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
166+
return empty, p[:2], p[2:]
167+
else:
168+
def splitroot(p):
169+
"""Split a pathname into drive, root and tail. On Posix, drive is always
170+
empty; the root may be empty, a single slash, or two slashes. The tail
171+
contains anything after the root. For example:
172+
173+
splitroot('foo/bar') == ('', '', 'foo/bar')
174+
splitroot('/foo/bar') == ('', '/', 'foo/bar')
175+
splitroot('//foo/bar') == ('', '//', 'foo/bar')
176+
splitroot('///foo/bar') == ('', '/', '//foo/bar')
177+
"""
178+
p = os.fspath(p)
179+
if isinstance(p, bytes):
180+
# Optimisation: the drive is always empty
181+
_, root, tail = _path_splitroot_ex(os.fsdecode(p))
182+
return b'', os.fsencode(root), os.fsencode(tail)
183+
return _path_splitroot_ex(p)
164184

165185

166186
# Return the tail (basename) part of a path, same as split(path)[1].

Lib/test/test_ntpath.py

+1
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ def test_normpath(self):
374374
tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\')
375375
tester("ntpath.normpath('\\\\foo')", '\\\\foo')
376376
tester("ntpath.normpath('\\\\')", '\\\\')
377+
tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\')
377378

378379
def test_realpath_curdir(self):
379380
expected = ntpath.normpath(os.getcwd())
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up :func:`os.path.splitroot` with a native implementation.

Modules/clinic/posixmodule.c.h

+59-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/posixmodule.c

+44
Original file line numberDiff line numberDiff line change
@@ -5467,6 +5467,49 @@ os__path_islink_impl(PyObject *module, PyObject *path)
54675467
#endif /* MS_WINDOWS */
54685468

54695469

5470+
/*[clinic input]
5471+
os._path_splitroot_ex
5472+
5473+
path: unicode
5474+
5475+
[clinic start generated code]*/
5476+
5477+
static PyObject *
5478+
os__path_splitroot_ex_impl(PyObject *module, PyObject *path)
5479+
/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/
5480+
{
5481+
Py_ssize_t len, drvsize, rootsize;
5482+
PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL;
5483+
5484+
wchar_t *buffer = PyUnicode_AsWideCharString(path, &len);
5485+
if (!buffer) {
5486+
goto exit;
5487+
}
5488+
5489+
_Py_skiproot(buffer, len, &drvsize, &rootsize);
5490+
drv = PyUnicode_FromWideChar(buffer, drvsize);
5491+
if (drv == NULL) {
5492+
goto exit;
5493+
}
5494+
root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize);
5495+
if (root == NULL) {
5496+
goto exit;
5497+
}
5498+
tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize],
5499+
len - drvsize - rootsize);
5500+
if (tail == NULL) {
5501+
goto exit;
5502+
}
5503+
result = Py_BuildValue("(OOO)", drv, root, tail);
5504+
exit:
5505+
PyMem_Free(buffer);
5506+
Py_XDECREF(drv);
5507+
Py_XDECREF(root);
5508+
Py_XDECREF(tail);
5509+
return result;
5510+
}
5511+
5512+
54705513
/*[clinic input]
54715514
os._path_normpath
54725515
@@ -16799,6 +16842,7 @@ static PyMethodDef posix_methods[] = {
1679916842
OS__FINDFIRSTFILE_METHODDEF
1680016843
OS__GETVOLUMEPATHNAME_METHODDEF
1680116844
OS__PATH_SPLITROOT_METHODDEF
16845+
OS__PATH_SPLITROOT_EX_METHODDEF
1680216846
OS__PATH_NORMPATH_METHODDEF
1680316847
OS_GETLOADAVG_METHODDEF
1680416848
OS_URANDOM_METHODDEF

0 commit comments

Comments
 (0)