Skip to content

Commit 3d7976e

Browse files
Update for new release of purescript-unicode (#106)
1 parent ef2d38f commit 3d7976e

File tree

1 file changed

+28
-20
lines changed

1 file changed

+28
-20
lines changed

Diff for: src/Text/Parsing/Parser/Token.purs

+28-20
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ import Control.Lazy (fix)
2727
import Control.Monad.State (gets, modify_)
2828
import Control.MonadPlus (guard, (<|>))
2929
import Data.Array as Array
30+
import Data.String.CodeUnits (toChar, singleton) as CodeUnits
31+
import Data.String.CodePoints (CodePoint, codePointFromChar)
3032
import Data.Char (fromCharCode, toCharCode)
31-
import Data.Char.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper, hexDigitToInt)
32-
import Data.Char.Unicode as Unicode
33+
import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper, hexDigitToInt)
34+
import Data.String.Unicode as Unicode
3335
import Data.Either (Either(..))
3436
import Data.Foldable (foldl, foldr)
3537
import Data.Identity (Identity)
@@ -551,7 +553,7 @@ makeTokenParser (LanguageDef languageDef)
551553
op :: Char -> Maybe Number -> Maybe Number
552554
op _ Nothing = Nothing
553555
op d (Just f) = do
554-
int' <- hexDigitToInt d
556+
int' <- hexDigitToInt $ codePointFromChar d
555557
pure $ ( f + toNumber int' ) / 10.0
556558

557559
exponent' :: ParserT String m Number
@@ -600,7 +602,7 @@ makeTokenParser (LanguageDef languageDef)
600602
where
601603
folder :: Maybe Int -> Char -> Maybe Int
602604
folder Nothing _ = Nothing
603-
folder (Just x) d = ((base * x) + _) <$> hexDigitToInt d
605+
folder (Just x) d = ((base * x) + _) <$> hexDigitToInt (codePointFromChar d)
604606

605607
-----------------------------------------------------------
606608
-- Operators & reserved ops
@@ -657,7 +659,10 @@ makeTokenParser (LanguageDef languageDef)
657659
Just { head: c, tail: cs } -> (caseChar c <?> msg) *> walk cs
658660

659661
caseChar :: Char -> ParserT String m Char
660-
caseChar c | isAlpha c = char (Unicode.toLower c) <|> char (Unicode.toUpper c)
662+
caseChar c | isAlpha (codePointFromChar c)
663+
, Just c1 <- CodeUnits.toChar (Unicode.toLowerSimple $ CodeUnits.singleton c)
664+
, Just c2 <- CodeUnits.toChar (Unicode.toUpperSimple $ CodeUnits.singleton c) =
665+
char c1 <|> char c2
661666
| otherwise = char c
662667

663668
msg :: String
@@ -741,7 +746,7 @@ whiteSpace' langDef@(LanguageDef languageDef)
741746
skipMany (simpleSpace <|> oneLineComment langDef <|> multiLineComment langDef <?> "")
742747

743748
simpleSpace :: forall m . Monad m => ParserT String m Unit
744-
simpleSpace = skipMany1 (satisfy isSpace)
749+
simpleSpace = skipMany1 (satisfyCP isSpace)
745750

746751
oneLineComment :: forall m . Monad m => GenLanguageDef String m -> ParserT String m Unit
747752
oneLineComment (LanguageDef languageDef) =
@@ -780,31 +785,34 @@ inCommentSingle (LanguageDef languageDef) =
780785
-- Helper functions that should maybe go in Text.Parsing.Parser.String --
781786
-------------------------------------------------------------------------
782787

783-
-- | Parse a digit. Matches any char that satisfies `Data.Char.Unicode.isDecDigit`.
788+
satisfyCP :: forall m . Monad m => (CodePoint -> Boolean) -> ParserT String m Char
789+
satisfyCP p = satisfy (p <<< codePointFromChar)
790+
791+
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
784792
digit :: forall m . Monad m => ParserT String m Char
785-
digit = satisfy isDecDigit <?> "digit"
793+
digit = satisfyCP isDecDigit <?> "digit"
786794

787-
-- | Parse a hex digit. Matches any char that satisfies `Data.Char.Unicode.isHexDigit`.
795+
-- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`.
788796
hexDigit :: forall m . Monad m => ParserT String m Char
789-
hexDigit = satisfy isHexDigit <?> "hex digit"
797+
hexDigit = satisfyCP isHexDigit <?> "hex digit"
790798

791-
-- | Parse an octal digit. Matches any char that satisfies `Data.Char.Unicode.isOctDigit`.
799+
-- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`.
792800
octDigit :: forall m . Monad m => ParserT String m Char
793-
octDigit = satisfy isOctDigit <?> "oct digit"
801+
octDigit = satisfyCP isOctDigit <?> "oct digit"
794802

795-
-- | Parse an uppercase letter. Matches any char that satisfies `Data.Char.Unicode.isUpper`.
803+
-- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`.
796804
upper :: forall m . Monad m => ParserT String m Char
797-
upper = satisfy isUpper <?> "uppercase letter"
805+
upper = satisfyCP isUpper <?> "uppercase letter"
798806

799-
-- | Parse a space character. Matches any char that satisfies `Data.Char.Unicode.isSpace`.
807+
-- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`.
800808
space :: forall m . Monad m => ParserT String m Char
801-
space = satisfy isSpace <?> "space"
809+
space = satisfyCP isSpace <?> "space"
802810

803-
-- | Parse an alphabetical character. Matches any char that satisfies `Data.Char.Unicode.isAlpha`.
811+
-- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`.
804812
letter :: forall m . Monad m => ParserT String m Char
805-
letter = satisfy isAlpha <?> "letter"
813+
letter = satisfyCP isAlpha <?> "letter"
806814

807815
-- | Parse an alphabetical or numerical character.
808-
-- | Matches any char that satisfies `Data.Char.Unicode.isAlphaNum`.
816+
-- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`.
809817
alphaNum :: forall m . Monad m => ParserT String m Char
810-
alphaNum = satisfy isAlphaNum <?> "letter or digit"
818+
alphaNum = satisfyCP isAlphaNum <?> "letter or digit"

0 commit comments

Comments
 (0)