diff --git a/ChangeLog.md b/ChangeLog.md index 1130c2f..59dc1e6 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -2,6 +2,10 @@ ## [Unreleased] +## [0.1.4.2] - 2021-10-14 +### Changed +- More types of multiline properties are supported. + ## [0.1.4.1] - 2021-10-07 ### Changed - CI fix diff --git a/package.yaml b/package.yaml index 0d1a976..faf595e 100644 --- a/package.yaml +++ b/package.yaml @@ -1,5 +1,5 @@ name: cobot-io -version: 0.1.4.1 +version: 0.1.4.2 github: "biocad/cobot-io" license: BSD3 category: Bio diff --git a/src/Bio/GB/Parser.hs b/src/Bio/GB/Parser.hs index 3c2200f..3f3fc52 100644 --- a/src/Bio/GB/Parser.hs +++ b/src/Bio/GB/Parser.hs @@ -5,17 +5,20 @@ module Bio.GB.Parser , rangeP ) where -import Bio.GB.Type (Feature (..), Form (..), GenBankSequence (..), Locus (..), - Meta (..), Parser, Reference (..), Source (..), Version (..)) -import Bio.Sequence (Border (..), MarkedSequence, Range (..), RangeBorder (..), - markedSequence, shiftRange) -import Control.Monad.Combinators (many, manyTill, optional, some, (<|>)) -import Data.Char (isAlphaNum, isSpace, isUpper) -import Data.Functor (($>)) -import Data.Text (Text, intercalate, pack, splitOn, unpack) -import Text.Megaparsec (option, satisfy, sepBy1, takeWhile1P, takeWhileP, try, ()) -import Text.Megaparsec.Char (char, digitChar, eol, letterChar, string) -import Text.Megaparsec.Char.Lexer (decimal) +import Bio.GB.Type (Feature (..), Form (..), GenBankSequence (..), + Locus (..), Meta (..), Parser, Reference (..), + Source (..), Version (..)) +import Bio.Sequence (Border (..), MarkedSequence, Range (..), + RangeBorder (..), markedSequence, shiftRange) +import Control.Monad.Combinators (many, manyTill, optional, some, (<|>)) +import Data.Char (isAlphaNum, isSpace, isUpper) +import Data.Functor (($>)) +import Data.Text (Text, intercalate, pack, splitOn, unpack) +import qualified Data.Text as T +import Text.Megaparsec (notFollowedBy, option, satisfy, sepBy1, takeWhile1P, + takeWhileP, try, ()) +import Text.Megaparsec.Char (char, digitChar, eol, letterChar, string) +import Text.Megaparsec.Char.Lexer (decimal) -- | Parser of .gb file. -- @@ -164,13 +167,28 @@ propsP = do propName <- takeWhile1P Nothing (/= '=') _ <- char '=' - propText <- try ((char '\"' *> takeWhile1P Nothing (/= '\"') <* char '\"') - <|> textWithSpacesP) - <* eolSpaceP + propText <- try ((char '\"' *> takeWhile1P Nothing (/= '\"') <* char '\"' <* eolSpaceP) + <|> multiLineProp) let propTextCorrect = mconcat $ filter (/= featureIndent2) $ splitOn featureIndent2 propText pure (propName, propTextCorrect) + where + indLine :: Parser Text + indLine = do + _ <- string featureIndent2 + notFollowedBy (char '/') + text <- textWithSpacesP + eolSpaceP + pure text + + multiLineProp :: Parser Text + multiLineProp = do + fstText <- textWithSpacesP <* eolSpaceP + rest <- many (try indLine) + pure $ T.concat (fstText : rest) + + -- | First level of identation in FEATURES table file. -- diff --git a/test/GB/fromYanaWithLove.gb b/test/GB/fromYanaWithLove.gb index ac7eea2..09f5793 100644 --- a/test/GB/fromYanaWithLove.gb +++ b/test/GB/fromYanaWithLove.gb @@ -17,6 +17,7 @@ FEATURES Location/Qualifiers /mol_type="other DNA" rep_origin join(1,6551..6951) /label=pUCorigin + and also a multiline property /note="/vntifkey=33" enhancer 449..858 /label=cmv enhanser diff --git a/test/GBParserSpec.hs b/test/GBParserSpec.hs index cec39a7..b27ee0e 100644 --- a/test/GBParserSpec.hs +++ b/test/GBParserSpec.hs @@ -226,7 +226,7 @@ sophisticatedFeatures = , preciseSpan (0, 6950)) , (Feature "rep_origin" - [ ("label", "pUCorigin") + [ ("label", "pUCorigin and also a multiline property") , ("note", "/vntifkey=33") ] , Join [Point 0, preciseSpan (6550, 6950)])