Skip to content

Commit 9d4edc2

Browse files
committed
Merge remote-tracking branch 'github/pr/183'
2 parents bb0e5cd + 5370bb4 commit 9d4edc2

File tree

4 files changed

+120
-41
lines changed

4 files changed

+120
-41
lines changed

System/FilePath/Internal.hs

+81-38
Original file line numberDiff line numberDiff line change
@@ -112,24 +112,24 @@ module System.OsPath.MODULE_NAME.Internal
112112

113113
{- HLINT ignore "Use fewer imports" -}
114114
import Prelude (Char, Bool(..), Maybe(..), (.), (&&), (<=), not, fst, maybe, (||), (==), ($), otherwise, fmap, mempty, (>=), (/=), (++), snd)
115+
import Data.Bifunctor (first)
115116
import Data.Semigroup ((<>))
116117
import qualified Prelude as P
117-
import Data.Maybe(isJust)
118+
import Data.Maybe(fromMaybe, isJust)
118119
import qualified Data.List as L
119120

120121
#ifndef OS_PATH
121122
import Data.String (fromString)
122123
import System.Environment(getEnv)
123-
import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, takeWhile, take, all, elem, any, span)
124+
import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, take, all, elem, any, span)
124125
import Data.Char(toLower, toUpper, isAsciiLower, isAsciiUpper)
125-
import Data.List(stripPrefix, isSuffixOf, uncons)
126+
import Data.List(stripPrefix, isSuffixOf, uncons, dropWhileEnd)
126127
#define CHAR Char
127128
#define STRING String
128129
#define FILEPATH FilePath
129130
#else
130131
import Prelude (fromIntegral)
131132
import Control.Exception ( SomeException, evaluate, try, displayException )
132-
import Data.Bifunctor (first)
133133
import Control.DeepSeq (force)
134134
import GHC.IO (unsafePerformIO)
135135
import qualified Data.Char as C
@@ -290,13 +290,24 @@ getSearchPath = fmap splitSearchPath (getEnv "PATH")
290290
-- > splitExtension "file.txt/boris.ext" == ("file.txt/boris",".ext")
291291
-- > splitExtension "file/path.txt.bob.fred" == ("file/path.txt.bob",".fred")
292292
-- > splitExtension "file/path.txt/" == ("file/path.txt/","")
293+
294+
-- A naive implementation would be to use @splitFileName_@ first,
295+
-- then break filename into basename and extension, then recombine dir and basename.
296+
-- This is way too expensive, see @splitFileName_@ comment for discussion.
297+
--
298+
-- Instead we speculatively split on the extension separator first, then check
299+
-- whether results are well-formed.
293300
splitExtension :: FILEPATH -> (STRING, STRING)
294-
splitExtension x = if null nameDot
295-
then (x, mempty)
296-
else (dir <> init nameDot, singleton extSeparator <> ext)
297-
where
298-
(dir,file) = splitFileName_ x
299-
(nameDot,ext) = breakEnd isExtSeparator file
301+
splitExtension x
302+
-- Imagine x = "no-dots", then nameDot = ""
303+
| null nameDot = (x, mempty)
304+
-- Imagine x = "\\shared.with.dots\no-dots"
305+
| isWindows && null (dropDrive nameDot) = (x, mempty)
306+
-- Imagine x = "dir.with.dots/no-dots"
307+
| any isPathSeparator ext = (x, mempty)
308+
| otherwise = (init nameDot, extSeparator `cons` ext)
309+
where
310+
(nameDot, ext) = breakEnd isExtSeparator x
300311

301312
-- | Get the extension of a file, returns @\"\"@ for no extension, @.ext@ otherwise.
302313
--
@@ -358,7 +369,7 @@ addExtension file xs = case uncons xs of
358369
Just (x, _) -> joinDrive a res
359370
where
360371
res = if isExtSeparator x then b <> xs
361-
else b <> singleton extSeparator <> xs
372+
else b <> (extSeparator `cons` xs)
362373

363374
(a,b) = splitDrive file
364375

@@ -383,7 +394,7 @@ isExtensionOf :: STRING -> FILEPATH -> Bool
383394
isExtensionOf ext = \fp -> case uncons ext of
384395
Just (x, _)
385396
| x == _period -> isSuffixOf ext . takeExtensions $ fp
386-
_ -> isSuffixOf (singleton _period <> ext) . takeExtensions $ fp
397+
_ -> isSuffixOf (_period `cons` ext) . takeExtensions $ fp
387398

388399
-- | Drop the given extension from a FILEPATH, and the @\".\"@ preceding it.
389400
-- Returns 'Nothing' if the FILEPATH does not have the given extension, or
@@ -403,7 +414,7 @@ isExtensionOf ext = \fp -> case uncons ext of
403414
-- > stripExtension "" x == Just x
404415
stripExtension :: STRING -> FILEPATH -> Maybe FILEPATH
405416
stripExtension ext path = case uncons ext of
406-
Just (x, _) -> let dotExt = if isExtSeparator x then ext else singleton _period <> ext
417+
Just (x, _) -> let dotExt = if isExtSeparator x then ext else _period `cons` ext
407418
in stripSuffix dotExt path
408419
Nothing -> Just path
409420

@@ -506,19 +517,21 @@ readDriveUNC bs = case unpack bs of
506517

507518
{- c:\ -}
508519
readDriveLetter :: STRING -> Maybe (FILEPATH, FILEPATH)
509-
readDriveLetter bs = case unpack bs of
510-
(x:c:y:xs)
511-
| c == _colon && isLetter x && isPathSeparator y -> Just $ addSlash (pack [x,_colon]) (pack (y:xs))
512-
(x:c:xs)
513-
| c == _colon && isLetter x -> Just (pack [x,_colon], pack xs)
514-
_ -> Nothing
520+
readDriveLetter bs = case uncons2 bs of
521+
Nothing -> Nothing
522+
Just (x, c, ys)
523+
| isLetter x, c == _colon -> Just $ case uncons ys of
524+
Just (y, _)
525+
| isPathSeparator y -> addSlash (pack [x,_colon]) ys
526+
_ -> (pack [x,_colon], ys)
527+
| otherwise -> Nothing
515528

516529
{- \\sharename\ -}
517530
readDriveShare :: STRING -> Maybe (FILEPATH, FILEPATH)
518531
readDriveShare bs = case unpack bs of
519532
(s1:s2:xs) | isPathSeparator s1 && isPathSeparator s2 ->
520533
let (a, b) = readDriveShareName (pack xs)
521-
in Just (singleton s1 <> singleton s2 <> a,b)
534+
in Just (s1 `cons` (s2 `cons` a), b)
522535
_ -> Nothing
523536

524537
{- assume you have already seen \\ -}
@@ -594,19 +607,53 @@ splitFileName x = if null path
594607
else (path, file)
595608
where
596609
(path, file) = splitFileName_ x
597-
dotSlash = singleton _period <> singleton _slash
610+
dotSlash = _period `cons` singleton _slash
598611

599612
-- version of splitFileName where, if the FILEPATH has no directory
600613
-- component, the returned directory is "" rather than "./". This
601614
-- is used in cases where we are going to combine the returned
602615
-- directory to make a valid FILEPATH, and having a "./" appear would
603616
-- look strange and upset simple equality properties. See
604617
-- e.g. replaceFileName.
618+
--
619+
-- A naive implementation is
620+
--
621+
-- splitFileName_ fp = (drv <> dir, file)
622+
-- where
623+
-- (drv, pth) = splitDrive fp
624+
-- (dir, file) = breakEnd isPathSeparator pth
625+
--
626+
-- but it is undesirable for two reasons:
627+
-- * splitDrive is very slow on Windows,
628+
-- * we unconditionally allocate 5 FilePath objects where only 2 would normally suffice.
629+
--
630+
-- In the implementation below we first speculatively split the input by the last path
631+
-- separator. In the vast majority of cases this is already the answer, except
632+
-- two exceptional cases explained below.
633+
--
605634
splitFileName_ :: FILEPATH -> (STRING, STRING)
606-
splitFileName_ fp = (drv <> dir, file)
635+
splitFileName_ fp
636+
-- If dirSlash is empty, @fp@ is either a genuine filename without any dir,
637+
-- or just a Windows drive name without slash like "c:".
638+
-- Run readDriveLetter to figure out.
639+
| isWindows
640+
, null dirSlash
641+
= fromMaybe (mempty, fp) (readDriveLetter fp)
642+
-- Another Windows quirk is that @fp@ could have been a shared drive "\\share"
643+
-- or UNC location "\\?\UNC\foo", where path separator is a part of the drive name.
644+
-- We can test this by trying dropDrive and falling back to splitDrive.
645+
| isWindows
646+
, Just (s1, _s2, bs') <- uncons2 dirSlash
647+
, isPathSeparator s1
648+
-- If bs' is empty, then s2 as the last character of dirSlash must be a path separator,
649+
-- so we are in the middle of shared drive.
650+
-- Otherwise, since s1 is a path separator, we might be in the middle of UNC path.
651+
, null bs' || maybe False (null . snd) (readDriveUNC dirSlash)
652+
= (fp, mempty)
653+
| otherwise
654+
= (dirSlash, file)
607655
where
608-
(drv, pth) = splitDrive fp
609-
(dir, file) = breakEnd isPathSeparator pth
656+
(dirSlash, file) = breakEnd isPathSeparator fp
610657

611658
-- | Set the filename.
612659
--
@@ -736,7 +783,7 @@ combineAlways a b | null a = b
736783
[a1, a2] | isWindows
737784
, isLetter a1
738785
, a2 == _colon -> a <> b
739-
_ -> a <> singleton pathSeparator <> b
786+
_ -> a <> (pathSeparator `cons` b)
740787

741788

742789
-- | Combine two paths with a path separator.
@@ -1068,7 +1115,7 @@ makeValid path
10681115
| isPosix = map (\x -> if x == _nul then _underscore else x) path
10691116
| isJust (readDriveShare drv) && all isPathSeparator drv = take 2 drv <> fromString "drive"
10701117
| isJust (readDriveUNC drv) && not (hasTrailingPathSeparator drv) =
1071-
makeValid (drv <> singleton pathSeparator <> pth)
1118+
makeValid (drv <> (pathSeparator `cons` pth))
10721119
| otherwise = joinDrive drv $ validElements $ validCHARs pth
10731120

10741121
where
@@ -1129,18 +1176,9 @@ isAbsolute = not . isRelative
11291176
#ifndef OS_PATH
11301177

11311178
-----------------------------------------------------------------------------
1132-
-- dropWhileEnd (>2) [1,2,3,4,1,2,3,4] == [1,2,3,4,1,2])
1133-
-- Note that Data.List.dropWhileEnd is only available in base >= 4.5.
1134-
dropWhileEnd :: (a -> Bool) -> [a] -> [a]
1135-
dropWhileEnd p = reverse . dropWhile p . reverse
1136-
1137-
-- takeWhileEnd (>2) [1,2,3,4,1,2,3,4] == [3,4])
1138-
takeWhileEnd :: (a -> Bool) -> [a] -> [a]
1139-
takeWhileEnd p = reverse . takeWhile p . reverse
1140-
11411179
-- spanEnd (>2) [1,2,3,4,1,2,3,4] = ([1,2,3,4,1,2], [3,4])
11421180
spanEnd :: (a -> Bool) -> [a] -> ([a], [a])
1143-
spanEnd p xs = (dropWhileEnd p xs, takeWhileEnd p xs)
1181+
spanEnd p = L.foldr (\x (pref, suff) -> if null pref && p x then (pref, x : suff) else (x : pref, suff)) ([], [])
11441182

11451183
-- breakEnd (< 2) [1,2,3,4,1,2,3,4] == ([1,2,3,4,1],[2,3,4])
11461184
breakEnd :: (a -> Bool) -> [a] -> ([a], [a])
@@ -1152,11 +1190,16 @@ breakEnd p = spanEnd (not . p)
11521190
stripSuffix :: Eq a => [a] -> [a] -> Maybe [a]
11531191
stripSuffix xs ys = reverse P.<$> stripPrefix (reverse xs) (reverse ys)
11541192

1193+
cons :: a -> [a] -> [a]
1194+
cons = (:)
11551195

11561196
unsnoc :: [a] -> Maybe ([a], a)
1157-
unsnoc [] = Nothing
1158-
unsnoc xs = Just (init xs, last xs)
1197+
unsnoc = L.foldr (\x -> Just . maybe ([], x) (first (x :))) Nothing
11591198

1199+
uncons2 :: [a] -> Maybe (a, a, [a])
1200+
uncons2 [] = Nothing
1201+
uncons2 [_] = Nothing
1202+
uncons2 (x : y : zs) = Just (x, y, zs)
11601203

11611204
_period, _quotedbl, _backslash, _slash, _question, _U, _N, _C, _colon, _semicolon, _US, _less, _greater, _bar, _asterisk, _nul, _space, _underscore :: Char
11621205
_period = '.'

System/OsPath/Data/ByteString/Short.hs

+16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE MultiWayIf #-}
12
{-# LANGUAGE NoImplicitPrelude #-}
23
-- |
34
-- Module : System.OsPath.Data.ByteString.Short
@@ -81,6 +82,7 @@ module System.OsPath.Data.ByteString.Short (
8182
last,
8283
tail,
8384
uncons,
85+
uncons2,
8486
head,
8587
init,
8688
unsnoc,
@@ -173,3 +175,17 @@ module System.OsPath.Data.ByteString.Short (
173175
) where
174176

175177
import Data.ByteString.Short.Internal
178+
import System.OsPath.Data.ByteString.Short.Internal
179+
180+
import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise)
181+
import Data.Word (Word8)
182+
183+
uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString)
184+
uncons2 = \sbs ->
185+
let l = length sbs
186+
nl = l - 2
187+
in if | l <= 1 -> Nothing
188+
| otherwise -> let h = indexWord8Array (asBA sbs) 0
189+
h' = indexWord8Array (asBA sbs) 1
190+
t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl
191+
in Just (h, h', t)

System/OsPath/Data/ByteString/Short/Internal.hs

+10-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ module System.OsPath.Data.ByteString.Short.Internal where
2020

2121
import Control.Monad.ST
2222
import Control.Exception (assert, throwIO)
23+
import Data.Bits (Bits(..))
2324
import Data.ByteString.Short.Internal (ShortByteString(..), length)
2425
#if !MIN_VERSION_base(4,11,0)
2526
import Data.Semigroup
@@ -284,15 +285,21 @@ writeWord16Array (MBA# mba#) (I# i#) (W16# w#) =
284285
ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of
285286
s' -> (# s', () #))
286287

288+
indexWord8Array :: BA
289+
-> Int -- ^ Word8 index
290+
-> Word8
291+
indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#)
292+
287293
-- | This isn't strictly Word16 array read. Instead it's two Word8 array reads
288294
-- to avoid endianness issues due to primops doing automatic alignment based
289295
-- on host platform. We expect the byte array to be LE always.
290296
indexWord16Array :: BA
291297
-> Int -- ^ Word8 index (not Word16)
292298
-> Word16
293-
indexWord16Array (BA# ba#) (I# i#) =
294-
case (# indexWord8Array# ba# i#, indexWord8Array# ba# (i# +# 1#) #) of
295-
(# lsb#, msb# #) -> W16# (decodeWord16LE# (# lsb#, msb# #))
299+
indexWord16Array ba i = fromIntegral lsb .|. (fromIntegral msb `shiftL` 8)
300+
where
301+
lsb = indexWord8Array ba i
302+
msb = indexWord8Array ba (i + 1)
296303

297304
#if !MIN_VERSION_base(4,16,0)
298305

System/OsPath/Data/ByteString/Short/Word16.hs

+13
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ module System.OsPath.Data.ByteString.Short.Word16 (
4646
last,
4747
tail,
4848
uncons,
49+
uncons2,
4950
head,
5051
init,
5152
unsnoc,
@@ -260,6 +261,18 @@ uncons = \(assertEven -> sbs) ->
260261
t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl
261262
in Just (h, t)
262263

264+
-- | /O(n)/ Extract first two elements and the rest of a ByteString,
265+
-- returning Nothing if it is shorter than two elements.
266+
uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString)
267+
uncons2 = \(assertEven -> sbs) ->
268+
let l = BS.length sbs
269+
nl = l - 4
270+
in if | l <= 2 -> Nothing
271+
| otherwise -> let h = indexWord16Array (asBA sbs) 0
272+
h' = indexWord16Array (asBA sbs) 2
273+
t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl
274+
in Just (h, h', t)
275+
263276
-- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16.
264277
-- An exception will be thrown in the case of an empty ShortByteString.
265278
head :: HasCallStack => ShortByteString -> Word16

0 commit comments

Comments
 (0)