From 71fc907a499f3938b28c055b40af0b68f65a2516 Mon Sep 17 00:00:00 2001
From: abychkova <7148571+abychkova@users.noreply.github.com>
Date: Wed, 18 Dec 2024 15:29:29 +0600
Subject: [PATCH] version 0.1.5.6: YLAB2-2755: spaces in gb locus name (#76)

---
 ChangeLog.md                          | 13 +++---
 package.yaml                          |  2 +-
 src/Bio/GB/Parser.hs                  | 33 +++++++-------
 src/Bio/MMTF/Decode/MessagePack.hs    |  6 ++-
 src/Bio/PDB.hs                        | 10 ++---
 src/Bio/PDB/BondRestoring.hs          | 40 +++++++++--------
 src/Bio/PDB/Functions.hs              | 15 ++++---
 src/Bio/PDB/Reader.hs                 |  2 +-
 src/Bio/Sequence/Functions/Marking.hs |  2 +-
 src/Bio/Sequence/Functions/Weight.hs  |  4 +-
 src/Bio/Uniprot/Parser.hs             | 29 ++++++++----
 test/GB/pIntA-TRBV.gb                 |  8 ++--
 test/GB/spaces_in_locus.gb            | 20 +++++++++
 test/GBParserSpec.hs                  | 64 +++++++++++++++------------
 14 files changed, 149 insertions(+), 99 deletions(-)
 create mode 100644 test/GB/spaces_in_locus.gb

diff --git a/ChangeLog.md b/ChangeLog.md
index 691e873..10636f8 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -2,6 +2,9 @@
 
 ## [Unreleased]
 
+## [0.1.5.6] - 2024-12-18
+- Fix for gb-parser: spaces in name in LOCUS
+
 ## [0.1.5.5] - 2024-05-16
 - Add `5ROX` modification to Fasta parser.
 
@@ -64,12 +67,12 @@ Added ASN hydrogen names sometimes set by Scho
 - Update dependency versions.
 
 ## [0.1.3.20] - 2021-06-04
-### Changed 
-- YLAB2-629: Fasta parser is now able to parse empty lines in the beginning. 
+### Changed
+- YLAB2-629: Fasta parser is now able to parse empty lines in the beginning.
 
 ## [0.1.3.19] - 2021-04-30
-### Changed 
-- Exports and instances for Biosset. 
+### Changed
+- Exports and instances for Biosset.
 
 ## [0.1.3.18] - 2021-03-09
 ### Fixed
@@ -110,7 +113,7 @@ Added ASN hydrogen names sometimes set by Scho
 
 ## [0.1.3.9] - 2020-10-27
 ### Fixed
-- FASTA parser can now parse empty lines with spaces. 
+- FASTA parser can now parse empty lines with spaces.
 
 ## [0.1.3.8] - 2020-10-22
 ### Fixed
diff --git a/package.yaml b/package.yaml
index a467a6a..e073f40 100644
--- a/package.yaml
+++ b/package.yaml
@@ -1,5 +1,5 @@
 name:                cobot-io
-version:             0.1.5.5
+version:             0.1.5.6
 github:              "biocad/cobot-io"
 license:             BSD3
 category:            Bio
diff --git a/src/Bio/GB/Parser.hs b/src/Bio/GB/Parser.hs
index 3f3fc52..f3bb340 100644
--- a/src/Bio/GB/Parser.hs
+++ b/src/Bio/GB/Parser.hs
@@ -26,7 +26,7 @@ genBankP :: Parser GenBankSequence
 genBankP =  GenBankSequence
         <$> (metaP <?> "Meta parser")
         <*> (gbSeqP <?> "GB sequence parser")
-        <*  string "//" <* eolSpaceP 
+        <*  string "//" <* eolSpaceP
 
 --------------------------------------------------------------------------------
 -- Block with meta-information.
@@ -48,16 +48,19 @@ metaP = do
 
 locusP :: Parser Locus
 locusP = string "LOCUS" *> space *> (Locus
-       <$> textP <* space                                      -- name
+       <$> nameP <* space                                      -- name
        <*> decimal <* space <* string "bp" <* space            -- sequence length
        <*> textP <* space                                      -- molecule type
-       <*> optional formP <* space                               -- form of sequence
-       <*> optional (pack <$> some (satisfy isUpper)) <* space   -- GenBank division
+       <*> optional formP <* space                             -- form of sequence
+       <*> optional (pack <$> some (satisfy isUpper)) <* space -- GenBank division
        <*> textP                                               -- modification date
        <*  eolSpaceP)
   where
     textP = takeWhile1P Nothing $ not . isSpace
 
+    nameP :: Parser Text
+    nameP = textP <> (try (string " " <> nameP) <|> "")
+
     formP :: Parser Form
     formP = try (string "linear" $> Linear) <|> (string "circular" $> Circular)
 
@@ -108,7 +111,7 @@ commentP = string "COMMENT" *> (try emptyP <|> (many (char ' ') *> someLinesP))
 --------------------------------------------------------------------------------
 
 featuresP :: Parser [(Feature, Range)]
-featuresP = -- skip unknown fields and stop on line with "FEATURES" 
+featuresP = -- skip unknown fields and stop on line with "FEATURES"
           manyTill (textWithSpacesP <* eolSpaceP) (string "FEATURES") *> space
           *> textWithSpacesP <* eolSpaceP
           *> some (featureP <?> "Single feature parser")
@@ -128,8 +131,8 @@ featureP = do
     pure (Feature featureName' props, shiftRange (-1) range)
 
 rangeP :: Parser Range
-rangeP =  try spanP 
-      <|> try betweenP 
+rangeP =  try spanP
+      <|> try betweenP
       <|> try pointP
       <|> try joinP
       <|> complementP
@@ -141,8 +144,8 @@ rangeP =  try spanP
         _ <- string ".."
         upperBorderType <- option Precise (try $ char '>' *> pure Exceeded)
         upperBorderLocation <- decimal
-        pure $ Span (RangeBorder lowerBorderType lowerBorderLocation) (RangeBorder upperBorderType upperBorderLocation) 
-                
+        pure $ Span (RangeBorder lowerBorderType lowerBorderLocation) (RangeBorder upperBorderType upperBorderLocation)
+
     betweenP :: Parser Range
     betweenP = do
         before <- decimal
@@ -152,13 +155,13 @@ rangeP =  try spanP
 
     pointP :: Parser Range
     pointP = fmap Point decimal
-   
+
     joinP :: Parser Range
     joinP = string "join(" *> fmap Join (rangeP `sepBy1` char ',') <* char ')'
 
     complementP :: Parser Range
     complementP = fmap Complement $ string "complement(" *> rangeP <* char ')'
-        
+
 
 propsP :: Parser (Text, Text)
 propsP = do
@@ -178,17 +181,17 @@ propsP = do
     indLine = do
         _ <- string featureIndent2
         notFollowedBy (char '/')
-        text <- textWithSpacesP 
+        text <- textWithSpacesP
         eolSpaceP
         pure text
 
     multiLineProp :: Parser Text
     multiLineProp = do
-        fstText <- textWithSpacesP <* eolSpaceP 
+        fstText <- textWithSpacesP <* eolSpaceP
         rest <- many (try indLine)
-        pure $ T.concat (fstText : rest) 
+        pure $ T.concat (fstText : rest)
+
 
-    
 
 -- | First level of identation in FEATURES table file.
 --
diff --git a/src/Bio/MMTF/Decode/MessagePack.hs b/src/Bio/MMTF/Decode/MessagePack.hs
index 22b99ce..f302b60 100644
--- a/src/Bio/MMTF/Decode/MessagePack.hs
+++ b/src/Bio/MMTF/Decode/MessagePack.hs
@@ -43,7 +43,11 @@ asStr _ (ObjectStr s) = pure s
 asStr m _             = fail $ T.unpack m <> ": not a string data"
 
 asChar :: MonadFail m => Text -> Object -> m Char
-asChar m = (head . T.unpack <$>) . asStr m
+asChar txt obj = do
+  str <- asStr txt obj
+  case T.unpack str of
+    []      -> return ' '
+    (c : _) -> return c
 
 asInt :: (MonadFail m, Integral a) => Text -> Object -> m a
 asInt _ (ObjectInt i)  = pure (fromIntegral i)
diff --git a/src/Bio/PDB.hs b/src/Bio/PDB.hs
index 925c7ea..fe16b57 100644
--- a/src/Bio/PDB.hs
+++ b/src/Bio/PDB.hs
@@ -72,15 +72,13 @@ instance StructureModels PDB.PDB where
 
             mkResidue :: Map Text (Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
             mkResidue _ []    = error "Cound not make residue from empty list"
-            mkResidue localBondsMap atoms' = Residue (T.strip $ PDB.atomResName firstResidueAtom)
-                                                     (PDB.atomResSeq firstResidueAtom)
-                                                     (PDB.atomICode firstResidueAtom)
+            mkResidue localBondsMap atoms'@(firstAtom : _) = Residue (T.strip $ PDB.atomResName firstAtom)
+                                                     (PDB.atomResSeq firstAtom)
+                                                     (PDB.atomICode firstAtom)
                                                      (V.fromList $ mkAtom <$> atoms')
-                                                     (localBondsMap M.!?! residueID firstResidueAtom)
+                                                     (localBondsMap M.!?! residueID firstAtom)
                                                      Undefined -- now we do not read secondary structure
                                                      ""        -- chemical component type?!
-              where
-                firstResidueAtom = head atoms'
 
             mkAtom :: PDB.Atom -> Atom
             mkAtom atom@PDB.Atom{..} = Atom (GlobalID $ atomToNilBasedIndex M.!?! atom)
diff --git a/src/Bio/PDB/BondRestoring.hs b/src/Bio/PDB/BondRestoring.hs
index fec3908..0f0287a 100644
--- a/src/Bio/PDB/BondRestoring.hs
+++ b/src/Bio/PDB/BondRestoring.hs
@@ -42,25 +42,28 @@ restoreChainLocalBonds' chainAtoms = residueIDToLocalBonds
     residueIDToLocalBonds = do
       (residueAtoms, residueBonds) <- zip chainAtomsGroupedByResidue intraResidueGlobalBonds
       let localBonds = V.fromList $ convertGlobalsToLocals residueAtoms residueBonds
-      let _residueID = residueID $ head residueAtoms
+      let _residueID =
+            case residueAtoms of
+              [] -> ""
+              (atom : _) -> residueID atom
       pure (_residueID, localBonds)
-    
+
     intraResidueGlobalBonds :: [[Bond PDB.Atom]]
     intraResidueGlobalBonds = fmap restoreIntraResidueBonds chainAtomsGroupedByResidue
-    
+
     chainAtomsGroupedByResidue :: [[PDB.Atom]]
     chainAtomsGroupedByResidue = groupChainByResidue chainAtoms
-    
+
     convertGlobalsToLocals :: [PDB.Atom] -> [Bond PDB.Atom] -> [Bond LocalID]
     convertGlobalsToLocals residueAtoms = map convertGlobalToLocal
       where
         convertGlobalToLocal :: Bond PDB.Atom -> Bond LocalID
-        convertGlobalToLocal (Bond from to order) = 
+        convertGlobalToLocal (Bond from to order) =
           Bond (LocalID $ atomToLocalIdMap ! from) (LocalID $ atomToLocalIdMap ! to) order
-        
+
         atomToLocalIdMap :: Map PDB.Atom Int
         atomToLocalIdMap = M.fromList $ zip sortedAtoms [0..]
-        
+
         sortedAtoms :: [PDB.Atom]
         sortedAtoms = sort residueAtoms
 
@@ -70,19 +73,19 @@ restoreModelGlobalBonds atomToNilBasedIndex chains = convertToGlobalIDs atomToNi
   where
     convertToGlobalIDs :: Map PDB.Atom Int -> Vector (Bond PDB.Atom) -> Vector (Bond GlobalID)
     convertToGlobalIDs mapping = reindexBonds (\atom -> GlobalID $ mapping ! atom)
-    
+
     reindexBonds :: (a -> b) -> Vector (Bond a) -> Vector (Bond b)
     reindexBonds convertID = fmap (\(Bond from to order) -> Bond (convertID from) (convertID to) order)
 
     chainAtomsGroupedByResidue :: Vector [[PDB.Atom]]
     chainAtomsGroupedByResidue = fmap groupChainByResidue chains
-    
+
     _intraResidueBonds :: [Bond PDB.Atom]
     _intraResidueBonds = concatMap restoreChainIntraResidueBonds chainAtomsGroupedByResidue
-    
+
     peptideBonds :: [Bond PDB.Atom]
     peptideBonds = concatMap restoreChainPeptideBonds chainAtomsGroupedByResidue
-    
+
     disulfideBonds :: [Bond PDB.Atom]
     disulfideBonds = restoreDisulfideBonds . concat $ V.toList chainAtomsGroupedByResidue
 
@@ -116,7 +119,7 @@ restoreChainPeptideBonds atomsGroupedByResidue = catMaybes $ restoreChainPeptide
     restoreChainPeptideBonds' :: [[PDB.Atom]] -> [Maybe (Bond PDB.Atom)] -> [Maybe (Bond PDB.Atom)]
     restoreChainPeptideBonds' [] acc = acc
     restoreChainPeptideBonds' [_] acc = acc
-    restoreChainPeptideBonds' (residue1:residue2:residues) acc = 
+    restoreChainPeptideBonds' (residue1:residue2:residues) acc =
       restoreChainPeptideBonds' (residue2:residues) (constructBond residue1 residue2 : acc)
 
     constructBond :: [PDB.Atom] -> [PDB.Atom] -> Maybe (Bond PDB.Atom)
@@ -129,7 +132,7 @@ restoreChainPeptideBonds atomsGroupedByResidue = catMaybes $ restoreChainPeptide
         guard $ distance (coords carbonAtom1) (coords nitrogenAtom2) < peptideBondMaxLength
 
         pure $ Bond carbonAtom1 nitrogenAtom2 1
-    
+
     getAtomByName :: [PDB.Atom] -> Text -> Maybe PDB.Atom
     getAtomByName atoms atomNameToFind = find ((atomNameToFind ==) . T.strip . PDB.atomName) atoms
 
@@ -137,20 +140,21 @@ restoreChainIntraResidueBonds :: [[PDB.Atom]] -> [Bond PDB.Atom]
 restoreChainIntraResidueBonds = concatMap restoreIntraResidueBonds
 
 restoreIntraResidueBonds :: [PDB.Atom] -> [Bond PDB.Atom]
-restoreIntraResidueBonds residueAtoms = catMaybes $ constructBond <$> residueBonds
+restoreIntraResidueBonds [] = []
+restoreIntraResidueBonds residueAtoms@(firstAtom : _) = catMaybes $ constructBond <$> residueBonds
   where
     -- TODO: support bond order somehow
     constructBond :: (Text, Text) -> Maybe (Bond PDB.Atom)
     constructBond (fromAtomName, toAtomName) = Bond <$> constructAtom fromAtomName <*> constructAtom toAtomName <*> Just 1
-    
+
     constructAtom :: Text -> Maybe PDB.Atom
     constructAtom atomName = atomNameToAtom !? atomName
-    
+
     atomNameToAtom :: Map Text PDB.Atom
     atomNameToAtom = M.fromList $ (\atom@PDB.Atom{..} -> (T.strip atomName, atom)) <$> residueAtoms
-    
+
     residueBonds :: [(Text, Text)]
-    residueBonds = intraResidueBonds . T.strip . PDB.atomResName $ head residueAtoms
+    residueBonds = intraResidueBonds . T.strip . PDB.atomResName $ firstAtom
 
 intraResidueBonds :: Text -> [(Text, Text)]
 intraResidueBonds "NMA" = [("CA", "N")]
diff --git a/src/Bio/PDB/Functions.hs b/src/Bio/PDB/Functions.hs
index f7fc3c6..0adefa2 100644
--- a/src/Bio/PDB/Functions.hs
+++ b/src/Bio/PDB/Functions.hs
@@ -7,20 +7,21 @@ import qualified Bio.Utils.Map as M ((!?!))
 
 import           Data.Map.Strict (Map)
 import qualified Data.Map.Strict as M (fromList)
-import           Data.List       (groupBy, 
+import           Data.List       (groupBy,
                                   sortOn)
 import           Data.Vector     (Vector)
 import qualified Data.Vector      as V (toList)
 import           Data.Char       (toUpper)
 
 groupChainByResidue :: Vector PDB.Atom -> [[PDB.Atom]]
-groupChainByResidue = sortOn (sortOnResidue . head) . groupBy atomsFromSameResidue . V.toList
-  where 
+groupChainByResidue = sortOn sortOnResidue . groupBy atomsFromSameResidue . V.toList
+  where
     atomsFromSameResidue :: PDB.Atom -> PDB.Atom -> Bool
     atomsFromSameResidue atom1 atom2 = PDB.atomResSeq atom1 == PDB.atomResSeq atom2 && PDB.atomICode atom1 == PDB.atomICode atom2
-    
-    sortOnResidue :: PDB.Atom -> Int
-    sortOnResidue PDB.Atom{..} = atomSerial * 100 + (insertionCodeSortingCorrections M.!?! toUpper atomICode)
-    
+
+    sortOnResidue :: [PDB.Atom] -> Int
+    sortOnResidue [] = -1000000
+    sortOnResidue (PDB.Atom{..} : _) = atomSerial * 100 + (insertionCodeSortingCorrections M.!?! toUpper atomICode)
+
     insertionCodeSortingCorrections :: Map Char Int
     insertionCodeSortingCorrections = M.fromList $ zip (' ':['A'..'Z']) [0..]
diff --git a/src/Bio/PDB/Reader.hs b/src/Bio/PDB/Reader.hs
index f79b97c..0625e2c 100644
--- a/src/Bio/PDB/Reader.hs
+++ b/src/Bio/PDB/Reader.hs
@@ -48,7 +48,7 @@ isMdlLine line = elem (T.take 6 line) modelStrings || elem (T.take 5 line) model
 
 checkRow :: [Int] -> Bool
 checkRow [] = True
-checkRow xs = last xs - head xs + 1 == L.length xs
+checkRow row@(x : _) = last row - x + 1 == L.length row
 
 checkMdlLines :: ([PDBWarnings], Text) -> Bool
 checkMdlLines warnings'n'text = checkRow mdlLineNumbers
diff --git a/src/Bio/Sequence/Functions/Marking.hs b/src/Bio/Sequence/Functions/Marking.hs
index a25017b..1430068 100644
--- a/src/Bio/Sequence/Functions/Marking.hs
+++ b/src/Bio/Sequence/Functions/Marking.hs
@@ -16,7 +16,7 @@ import           Data.List            (nub)
 import           Data.List.NonEmpty   (NonEmpty (..))
 import           Data.Text            (Text)
 import qualified Data.Vector          as V (toList)
-import           Prelude              hiding (drop, head, length, null, reverse, tail, take, (!!))
+import           Prelude              hiding (length)
 
 import Bio.NucleicAcid.Nucleotide      (Complementary (..))
 import Bio.Sequence.Class              (ContainsMarking, IsBareSequence, IsMarkedSequence,
diff --git a/src/Bio/Sequence/Functions/Weight.hs b/src/Bio/Sequence/Functions/Weight.hs
index bac8e8f..f9caadb 100644
--- a/src/Bio/Sequence/Functions/Weight.hs
+++ b/src/Bio/Sequence/Functions/Weight.hs
@@ -25,9 +25,7 @@ import           Data.Text                       (Text)
 import           Data.Vector                     (Vector)
 import qualified Data.Vector                     as V (drop, length, take,
                                                        toList, (!))
-import           Prelude                         hiding (drop, head, length,
-                                                  null, reverse, tail, take,
-                                                  (!!))
+import           Prelude                         hiding (drop, length, take, (!!))
 
 -- | Range of form [a, b].
 --
diff --git a/src/Bio/Uniprot/Parser.hs b/src/Bio/Uniprot/Parser.hs
index aa1d1ad..7854aa5 100644
--- a/src/Bio/Uniprot/Parser.hs
+++ b/src/Bio/Uniprot/Parser.hs
@@ -8,7 +8,7 @@
 module Bio.Uniprot.Parser where
 
 import           Prelude              hiding (null)
-import qualified Prelude              as P (concat, init, last, null, tail)
+import qualified Prelude              as P (concat, init, last)
 
 #if MIN_VERSION_base(4, 18, 0)
 import           Control.Applicative  ((<|>))
@@ -127,7 +127,9 @@ parseDE = do
             string "="
             pure ()
         result <- pack . P.init <$> many1 (satisfy (not . isEndOfLine))
-        pure . head $ " {ECO" `splitOn` result
+        case " {ECO" `splitOn` result of
+          [] -> pure ""
+          (txt : _) -> pure txt
 
     -- Parses internal DE entities
     parseInternal :: Text -> Parser DE
@@ -235,9 +237,11 @@ parseOH = do
     taxId <- pack <$> many1 (notChar ';')
     char ';'
     hostName' <- many' (satisfy $ not . isEndOfLine)
-    let hostName = pack $ if P.null hostName'
-                            then ""
-                            else P.tail . P.init $ hostName'
+    let hostName = pack $
+          case hostName' of
+            [] -> ""
+            [x] -> ""
+            (x : xs) -> init xs
     pure OH{..}
 
 -- | Parses RN, RP, RC, RX, RG, RA, RT and RL lines of UniProt-KB text file.
@@ -300,7 +304,11 @@ parseCC = do
     topic <- pack <$> many1 (notChar ':')
     char ':'
     (char ' ' $> ()) <|> (endOfLine >> string "CC" >> count 7 space $> ())
-    comment <- head . (" {ECO" `splitOn`) . pack <$> parseMultiLineComment "CC" 7
+    splitedComments <- (" {ECO" `splitOn`) . pack <$> parseMultiLineComment "CC" 7
+    let comment =
+          case splitedComments of
+            [] -> ""
+            (x : _) -> x
     pure CC{..}
 
 -- | UniProt-KB copyright comment
@@ -494,7 +502,10 @@ parseBreak txt = ((endOfLine >> string txt >> string "   ") <|> string " ") $> (
 parseDefItem :: Text -> Parser Text
 parseDefItem name = do
     string name >> char '='
-    head . (" {" `splitOn`) . pack <$> parseTillChar ';'
+    txt <- (" {" `splitOn`) . pack <$> parseTillChar ';'
+    case txt of
+      [] -> return ""
+      (x : _) -> return x
 
 -- | Parses line till specific char (e.g. semicolon or dot) before space/endOfLine/endOfInput.
 parseTillChar :: Char -> Parser String
@@ -524,8 +535,8 @@ hyphenConcat [x]      = x
 hyphenConcat (x:y:ys) = x ++ hyphenConcat (sy:ys)
   where
     sy :: String
-    sy | last x == '-'                  = tail y
-       | isAA (last x) && isAA (y !! 1) = tail y
+    sy | last x == '-'                  = drop 1 y
+       | isAA (last x) && isAA (y !! 1) = drop 1 y
        | otherwise                      = y
 
     isAA :: Char -> Bool
diff --git a/test/GB/pIntA-TRBV.gb b/test/GB/pIntA-TRBV.gb
index 5080990..6c7874c 100644
--- a/test/GB/pIntA-TRBV.gb
+++ b/test/GB/pIntA-TRBV.gb
@@ -1,6 +1,6 @@
-LOCUS       P2-32_pIntA-TRBV5-1_J1-1-Fc-lama-knob-EPEA 5960 bp   circular 27-NOV-2020
-SOURCE      
-  ORGANISM  
+LOCUS       P2-32_pIntA-TRBV5-1_J1-1-Fc-lama-knob-EPEA    5960 bp   circular 27-NOV-2020
+SOURCE
+  ORGANISM
 COMMENT     This file is created by Vector NTI
             http://www.invitrogen.com/
 COMMENT     ORIGDB|GenBank
@@ -9,7 +9,7 @@ COMMENT     VNTDBDATE|-13020134|
 COMMENT     LSOWNER|
 COMMENT     VNTNAME|pIntA-mIgK-GFP-stuffer-KpnI-EPEA|
 COMMENT     VNTAUTHORNAME|Demo User|
-BASE COUNT     1711 a      1577 c      1543 g      1830 t 
+BASE COUNT     1711 a      1577 c      1543 g      1830 t
 EMBL_ID     P1-79_pIntA-mIgK-GFP-stuffer-KpnI-EPEA 5945 bp   circular 14-OCT-2020
 EMBL_ID     P1-79_pIntA-mIgK-GFP-stuffer-KpnI-EPEA 6661 bp   circular 29-JAN-2020
 EMBL_ID     pIntA-mIgK-GFP-s        6661 bp    DNA     circular      4-OCT-2019
diff --git a/test/GB/spaces_in_locus.gb b/test/GB/spaces_in_locus.gb
new file mode 100644
index 0000000..13ab1f5
--- /dev/null
+++ b/test/GB/spaces_in_locus.gb
@@ -0,0 +1,20 @@
+LOCUS       BLABLA BLABLA BLABLA        100 bp DNA     circular SYN 11-SEP-2024
+DEFINITION  synthetic circular DNA.
+ACCESSION   .
+VERSION     .
+KEYWORDS    .
+SOURCE      synthetic DNA construct
+  ORGANISM  synthetic DNA construct
+REFERENCE   1  (bases 1 to 100)
+  AUTHORS   hhh
+  TITLE     Direct Submission
+  JOURNAL   Exported Sep 12, 2024 from SnapGene 6.1.0
+            https://www.snapgene.com
+FEATURES             Location/Qualifiers
+     source          1..100
+                     /mol_type="other DNA"
+                     /organism="plasmid"
+ORIGIN
+        1 cctacagcgt cctacagcgt cctacagcgt cctacagcgt cctacagcgt cctacagcgt
+       61 cctacagcgt cctacagcgt cctacagcgt cctacagcgt
+//
diff --git a/test/GBParserSpec.hs b/test/GBParserSpec.hs
index b27ee0e..225634f 100644
--- a/test/GBParserSpec.hs
+++ b/test/GBParserSpec.hs
@@ -20,50 +20,51 @@ gbParserSpec = describe "GenBank format parser." $ do
     dottedMetaSpecP "test/GB/pAAV-GFP-CellBioLab-dots.gb"
     unknownFieldsSpecP "test/GB/pIntA-TRBV.gb"
     baseCountWithSophisticatedRangesAndMultilineFeatures "test/GB/fromYanaWithLove.gb"
+    spacesInLocusName "test/GB/spaces_in_locus.gb"
 
 rangeTests :: Spec
 rangeTests = describe "Range parser" $ do
-    it "correctly parses a simple span" $ 
+    it "correctly parses a simple span" $
         greedyRangeP "69..420" `shouldBe` successful (Span (RangeBorder Precise 69) (RangeBorder Precise 420))
     it "correctly parses a span with the lower border exceeded" $
         greedyRangeP "<69..420" `shouldBe` successful (Span (RangeBorder Exceeded 69) (RangeBorder Precise 420))
     it "correctly parses a span with the upper border exceeded" $
         greedyRangeP "69..>420" `shouldBe` successful (Span (RangeBorder Precise 69) (RangeBorder Exceeded 420))
-    it "correctly parses a span with both border exceeded" $ 
+    it "correctly parses a span with both border exceeded" $
         greedyRangeP "<69..>420" `shouldBe` successful (Span (RangeBorder Exceeded 69) (RangeBorder Exceeded 420))
-    it "does not parse a span with the lower border exceeded incorrectly" $ 
+    it "does not parse a span with the lower border exceeded incorrectly" $
         greedyRangeP ">69..420" `shouldSatisfy` isFail
-    it "does not parse a span with the upper border exceeded incorrectly" $ 
+    it "does not parse a span with the upper border exceeded incorrectly" $
         greedyRangeP "69..<420" `shouldSatisfy` isFail
 
-    it "correctly parses a 'between' statement" $ 
+    it "correctly parses a 'between' statement" $
         greedyRangeP "41^42" `shouldBe` successful (Between 41 42)
-    it "does not parse a 'between' statement witn border excession marks" $ 
+    it "does not parse a 'between' statement witn border excession marks" $
         greedyRangeP "<41^42" `shouldSatisfy` isFail
 
-    it "correctly parses a single point feature" $ 
+    it "correctly parses a single point feature" $
         greedyRangeP "42" `shouldBe` successful (Point 42)
-    it "does not parse a single point feature with border excession marks" $ 
+    it "does not parse a single point feature with border excession marks" $
         greedyRangeP "<3" `shouldSatisfy` isFail
 
-    it "correctly parses a join() statement" $ 
+    it "correctly parses a join() statement" $
         greedyRangeP "join(2,12..56)" `shouldBe` successful (Join [Point 2, Span (RangeBorder Precise 12) (RangeBorder Precise 56)])
-    it "correctly parses a sophisticated join() statement" $ 
+    it "correctly parses a sophisticated join() statement" $
         greedyRangeP "join(2^3,<5..10,15,20..>28)" `shouldBe` successful (Join [Between 2 3, Span (RangeBorder Exceeded 5) (RangeBorder Precise 10), Point 15, Span (RangeBorder Precise 20) (RangeBorder Exceeded 28)])
 
-    it "correctly parses a complement() statement" $ 
+    it "correctly parses a complement() statement" $
         greedyRangeP "complement(69..>420)" `shouldBe` successful (Complement (Span (RangeBorder Precise 69) (RangeBorder Exceeded 420)))
-    it "correctly parses a join() incorporated into a complement()" $ 
+    it "correctly parses a join() incorporated into a complement()" $
         greedyRangeP "complement(join(2^3,<5..10,15,20..>28))" `shouldBe` successful (Complement (Join [Between 2 3, Span (RangeBorder Exceeded 5) (RangeBorder Precise 10), Point 15, Span (RangeBorder Precise 20) (RangeBorder Exceeded 28)]))
   where
     greedyRangeP :: Text -> Either String Range
     greedyRangeP = over _Left errorBundlePretty . parse (rangeP <* eof) ""
-    
+
     successful :: a -> Either String a
     successful = Right
 
     isFail :: Either String a -> Bool
-    isFail = null 
+    isFail = null
 
 pAAVGFPSpecP :: FilePath -> Spec
 pAAVGFPSpecP path = describe "pAAVGFP" $ do
@@ -101,7 +102,7 @@ baseCountWithSophisticatedRangesAndMultilineFeatures :: FilePath -> Spec
 baseCountWithSophisticatedRangesAndMultilineFeatures path = describe "" $ do
     it "correctly parses the 'BASE COUNT' line and features with sophisticated ranges" $ do
         gbS <- gbSeq <$> fromFile path
-        gbS `shouldBe`  unsafeMarkedSequence sophisticatedFeaturesSeq sophisticatedFeatures 
+        gbS `shouldBe`  unsafeMarkedSequence sophisticatedFeaturesSeq sophisticatedFeatures
 
 compPreciseSpan :: (Int, Int) -> Range
 compPreciseSpan = Complement . preciseSpan
@@ -218,20 +219,20 @@ sophisticatedFeaturesSeq :: String
 sophisticatedFeaturesSeq = "cctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctatggaaaaacgccagcaacgcggcctttttacggttcctggccttttgctggccttttgctcacatgttctttcctgcgttatcccctgattctgtggataaccgtattaccgcctttgagtgagctgataccgctcgccgcagccgaacgaccgagcgcagcgagtcagtgagcgaggaagcgtacatttatattggctcatgtccaatatgaccgccatgttgacattgattattgactagaccgcgttacataacttacggtaaatggcccgcctggctgaccgcccaacgacccccgcccattgacgtcaataatgacgtatgttcccatagtaacgccaatagggactttccattgacgtcaatgggtggagtatttacggtaaactgcccacttggcagtacatcaagtgtatcatatgccaagtacgccccctattgacgtcaatgacggtaaatggcccgcctggcattatgcccagtacatgaccttatgggactttcctacttggcagtacatctacgtattagtcatcgctattaccatggtgatgcggttttggcagtacatcaatgggcgtggatagcggtttgactcacggggatttccaagtctccaccccattgacgtcaatgggagtttgttttggcaccaaaatcaacgggactttccaaaatgtcgtaacaactccgccccattgacgcaaatgggcggtaggcgtgtacggtgggaggtctatataagcagagctcgtttagtgaaccgtcagatcgcctggagacgccatccacgctgttttgacctccatagaagacaccgggaccgatccagcctccgcggccgggaacggtgcattggaacgcggattccccgtgccaagagtgacgtaagtaccgcctatagagtctataggcccacccccttggcttcttatgcatgctatactgtttttggcttggggtctatacacccccgcttcctcatgttataggtgatggtatagcttagcctataggtgtgggttattgaccattattgaccactcccctattggtgacgatactttccattactaatccataacatggctctttgccacaactctctttattggctatatgccaatacactgtccttcagagactgacacggactctgtatttttacaggatggggtctcatttattatttacaaattcacatatacaacaccaccgtccccagtgcccgcagtttttattaaacataacgtgggatctccacgcgaatctcgggtacgtgttccggacatgggctcatctccggtagcggcggagcttctacatccgagccctgctcccatgcctccagcgactcatggtcgctcggcagctccttgctcctaacagtggaggccagacttaggcacagcacgatgcccaccaccaccagtgtgccgcacaaggccgtggcggtagggtatgtgtctgaaaatgagctcggggagcgggcttgcaccgctgacgcatttggaagacttaaggcagcggcagaagaagatgcaggcagctgagttgttgtgttctgataagagtcagaggtaactcccgttgcggtgctgttaacggtggagggcagtgtagtctgagcagtactcgttgctgccgcgcgcgccaccagacataatagctgacagactaacagactgttcctttccatgggtcttttctgcagtcaccgtccttgacacgaagcttgccgccaccatggagaccgacaccctgctgctgtgggtgctgctgctgtgggtgcccgggtcgacgaagagctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggtcagtgttacaaccaattaaccaattctgaacattatcgcgagcccatttatacctgaatatggctcataacaccccttgtttgcctggcggcagtagcgcggtggtcccacctgaccccatgccgaactcagaagtgaaacgccgtagcgccgatggtagtgtggggactccccatgcgagagtagggaactgccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgcccgggctaattatggggtgtcgcccttggggtgagaccctcgagtgtacagaattcttactgatacgtgtccagatcaaccgctttcacgacctctaccagacacatgtgatcacggcgctcgtcgcggtctttgctcagtttggtgtggtaggtaatgtgatgataacgcgggatatgcactgccgcggagcccgccaacggacgattcatttggctgcatttggtaaccagtttttcggtcacaccttcaatatcgtacgcctggttgaactcaacgcggatgccattgttaacggtgtcaggcagaatatacagaatgcttggcgggcattggaatgcaacgttcttacgcagaatgtgaccgtctttcttaaagttctcaccagtcagcgtgacacgattgtagatagaaccgcgttcgtaggtaaccatagcacgcgtcttgtacacgccgtcgccttcgaagctgatggtacgctcttgggtataaccttccggcatggcgctcttaaagaaatccttgatgtggctcgggtacttgcgaaacactgaacaccgtagctcagggtgctcaccagggttgcccacgggacccggcaggtcgcccgtagtgcagatgtatttcgctttaatggtacccgtggtcgcgtcacccggtaccctcgcctttaatgataaatttcataccttcgacgtcgccttccagttcggtgatatacgggatctctttctcaaacagttttgcaccttccgtcaatgccgtcatatgtttacctcctaaggtctcgaaaagttaaacaaaattatttctaaagggaaaccgttgtggaattgtgagcgctcacaattccacatattataattgttatccgctcacaaagcaaataaatttttcatgatttcactgtgcatgaagctcgtaattgttatccgctcacaattaagggcgacacaaaatttattctaaatgataataaatactgataacatcttatagtttgtattatattttgtattatcgttgacatgtataattttgatatcaaaaactgattttccctttattattttcgagatttattttcttaattctctttaacaaactagaaatattgtatatacaaaaaatcataaataatagatgaatagtttaattataggtgttcatcaatcgaaaaagcaacgtatcttatttaaagtgcgttgcttttttctcatttataaggttaaataattctcatatatcaagcaaagtgacaggcgcccttaaatattctgacaaatgctctttccctaaactccccccataaaaaaacccgccgaagcgggtttttacgttatttgcggattaacgattactcgttatcagaaccgcccagggggcccgagcttaagactggccgtcgttttacaacacaagctcttccgtacggtggctgcaccatctgtcttcatcttcccgccatctgatgagcagttgaaatctggaactgcctctgttgtgtgcctgctgaataacttctatcccagagaggccaaagtacagtggaaggtggataacgccctccaatcgggtaactcccaggagagtgtcacagagcaggacagcaaggacagcacctacagcctcagcagcaccctgacgctgagcaaagcagactacgagaaacacaaagtctacgcctgcgaagtcacccatcagggcctgagctcgcccgtcacaaagagcttcaacaggggagagtgttaatagtctagacctaggtgatcataatcagccataccacatttgtagaggttttacttgctttaaaaaacctcccacacctccccctgaacctgaaacataaaatgaatgcaattgttgttgttaacttgtttattgcagcttataatggttacaaataaagcaatagcatcacaaatttcacaaataaagcatttttttcactgcattctagttgtggtttgtccaaactcatcaatgtatcttatcatgtctggagatctctagctagaggatcgatccccgccccggacgaactaaacctgactacgacatctctgccccttcttcgcggggcagtgcatgtaatcccttcagttggttggtacaacttgccaactgaaccctaaacgggtagcatatgcttcccgggtagtagtatatactatccagactaaccctaattcaatagcatatgttacccaacgggaagcatatgctatcgaattagggttagtaaaagggtcctaaggaacagcgatgtaggtgggcgggccaagataggggcgcgattgctgcgatctggaggacaaattacacacacttgcgcctgagcgccaagcacagggttgttggtcctcatattcacgaggtcgctgagagcacggtgggctaatgttgccatgggtagcatatactacccaaatatctggatagcatatgctatcctaatctatatctgggtagcataggctatcctaatctatatctgggtagcatatgctatcctaatctatatctgggtagtatatgctatcctaatttatatctgggtagcataggctatcctaatctatatctgggtagcatatgctatcctaatctatatctgggtagtatatgctatcctaatctgtatccgggtagcatatgctatcctaatagagattagggtagtatatgctatcctaatttatatctgggtagcatatactacccaaatatctggatagcatatgctatcctaatctatatctgggtagcatatgctatcctaatctatatctgggtagcataggctatcctaatctatatctgggtagcatatgctatcctaatctatatctgggtagtatatgctatcctaatttatatctgggtagcataggctatcctaatctatatctgggtagcatatgctatcctaatctatatctgggtagtatatgctatcctaatctgtatccgggtagcatatgctatcctcatgataagctgtcaaacatgagaattaattcttgaagacgaaagggcctcgtgatacgcctatttttataggttaatgtcatgataataatggtttcttagacgtcaggtggcacttttcggggaaatgtgcgcggaacccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagtattcaacatttccgtgtcgcccttattcccttttttgcggcattttgccttcctgtttttgctcacccagaaacgctggtgaaagtaaaagatgctgaagatcagttgggtgcacgagtgggttacatcgaactggatctcaacagcggtaagatccttgagagttttcgccccgaagaacgttttccaatgatgagcacttttaaagttctgctatgtggcgcggtattatcccgtgttgacgccgggcaagagcaactcggtcgccgcatacactattctcagaatgacttggttgagtactcaccagtcacagaaaagcatcttacggatggcatgacagtaagagaattatgcagtgctgccataaccatgagtgataacactgcggccaacttacttctgacaacgatcggaggaccgaaggagctaaccgcttttttgcacaacatgggggatcatgtaactcgccttgatcgttgggaaccggagctgaatgaagccataccaaacgacgagcgtgacaccacgatgcctgcagcaatggcaacaacgttgcgcaaactattaactggcgaactacttactctagcttcccggcaacaattaatagactggatggaggcggataaagttgcaggaccacttctgcgctcggcccttccggctggctggtttattgctgataaatctggagccggtgagcgtgggtctcgcggtatcattgcagcactggggccagatggtaagccctcccgtatcgtagttatctacacgacggggagtcaggcaactatggatgaacgaaatagacagatcgctgagataggtgcctcactgattaagcattggtaactgtcagaccaagtttactcatatatactttagattgatttaaaacttcatttttaatttaaaaggatctaggtgaagatcctttttgataatctcatgaccaaaatcccttaacgtgagttttcgttccactgagcgtcagaccccgtagaaaagatcaaaggatcttcttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagata"
 
 sophisticatedFeatures :: [(Feature, Range)]
-sophisticatedFeatures = 
-  [ (Feature "source" 
+sophisticatedFeatures =
+  [ (Feature "source"
       [ ("organism", "synthetic DNA construct")
       , ("mol_type", "other DNA")
       ]
     , preciseSpan (0, 6950))
 
-  , (Feature "rep_origin" 
+  , (Feature "rep_origin"
       [ ("label", "pUCorigin and also a multiline property")
       , ("note", "/vntifkey=33")
       ]
     , Join [Point 0, preciseSpan (6550, 6950)])
 
-  , (Feature "enhancer" 
+  , (Feature "enhancer"
       [ ("label", "cmv enhanser")
       , ("label", "cmv\\enhanser")
       , ("note", "/vntifkey=9")
@@ -251,7 +252,7 @@ sophisticatedFeatures =
       ]
     , preciseSpan (1011, 1918))
 
-  , (Feature "primer_bind" 
+  , (Feature "primer_bind"
       [ ("label", "inv olig1") ]
     , preciseSpan (1501, 1521))
 
@@ -269,7 +270,7 @@ sophisticatedFeatures =
       ]
     , preciseSpan (1953, 2009))
 
-  , (Feature "misc_feature"   
+  , (Feature "misc_feature"
       [ ("label", "START")
       , ("note", "START")
       , ("note", "/ugene_name=START")
@@ -277,7 +278,7 @@ sophisticatedFeatures =
       ]
     , preciseSpan (1953, 1955))
 
-  , (Feature "misc_feature"   
+  , (Feature "misc_feature"
       [ ("label", "GFP stuffer")
       , ("note", "GFP stuffer")
       , ("note", "/ugene_name=GFP\\ stuffer")
@@ -285,7 +286,7 @@ sophisticatedFeatures =
       ]
     , preciseSpan (2010, 3738))
 
-  , (Feature "misc_feature"   
+  , (Feature "misc_feature"
       [ ("label", "CK")
       , ("note", "CK")
       , ("note", "/ugene_name=CK")
@@ -293,7 +294,7 @@ sophisticatedFeatures =
       ]
     , preciseSpan (3739, 4059))
 
-  , (Feature "misc_feature"   
+  , (Feature "misc_feature"
       [ ("label", "STOP")
       , ("note", "STOP")
       , ("note", "/ugene_name=STOP")
@@ -301,7 +302,7 @@ sophisticatedFeatures =
       ]
     , preciseSpan (4060, 4065))
 
-  , (Feature "misc_feature"  
+  , (Feature "misc_feature"
       [ ("gene", "SV40_PA term")
       , ("label", "SV40_PA term")
       , ("label", "SV40_PA\\term")
@@ -309,18 +310,18 @@ sophisticatedFeatures =
       ]
     , preciseSpan (4078, 4316))
 
-  , (Feature "primer_bind"   
+  , (Feature "primer_bind"
       [ ("label", "pEE_Clab") ]
     , preciseSpan (4349, 4369))
 
-  , (Feature "rep_origin"     
+  , (Feature "rep_origin"
       [ ("label", "EBV ori")
       , ("label", "EBV\\ori")
       , ("note", "/vntifkey=33")
       ]
     , preciseSpan (4581, 4974))
 
-  , (Feature "CDS"          
+  , (Feature "CDS"
       [ ("codon_start", "1")
       , ("label", "AmpR")
       , ("note", "/vntifkey=4")
@@ -328,3 +329,10 @@ sophisticatedFeatures =
       ]
     , preciseSpan (5542, 6402))
   ]
+
+spacesInLocusName :: FilePath -> Spec
+spacesInLocusName path =
+  describe "should correct parse gb with spaces in plasmid name in locus" $
+  it "" $ do
+    mt <- meta <$> fromFile path
+    name (locus mt) `shouldBe` "BLABLA BLABLA BLABLA"
\ No newline at end of file