Skip to content

Commit c521fd1

Browse files
authored
IO fixes, bond restoring (#32)
* Residue index supported in PDB * Residue number and insertion code: the proper way * HETATMs now parsed from PDB * Insertion code support in mae * PDBSpec renamed to PDBParserSpec * Bond restoring first version. Doesn't work correctly for now * Global bond restoring work correctly * Fixed text * Type * Bond restoring works correctly. Input data must be slightly fixed * Added missing disulfide bridges to mae structures * Local bond for PDB, with no tests yet * Fuck, nothing works * Everything works! * pedantation * Fetching from REST API avoided in MMTF test * Changelog, version * Removed some strange comments in a test * Changes after the review * Added atomInputIndex. GlobalID is 0-based now * Changelog and formatting
1 parent 94796ca commit c521fd1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+45491
-606
lines changed

ChangeLog.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,17 @@
22

33
## [Unreleased]
44

5+
## [0.1.3.0] - 2020-03-27
6+
### Added
7+
- Residue index in `Structure`.
8+
- Atom input index in `Structure`.
9+
- Bond restoring for PDB.
10+
- Tests for PDB -> Model conversion.
11+
### Changed
12+
- GlobalID now 0-based in mae, PDB, and MMTF.
13+
### Fixed
14+
- A lot of things.
15+
516
## [0.1.2.10] - 2020-03-27
617
### Added
718
- Lenses for `Structure`.

package.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: cobot-io
2-
version: 0.1.2.10
2+
version: 0.1.3.0
33
github: "less-wrong/cobot-io"
44
license: BSD3
55
category: Bio

src/Bio/MAE.hs

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
{-# LANGUAGE AllowAmbiguousTypes #-}
2-
{-# LANGUAGE CPP #-}
1+
{-# LANGUAGE AllowAmbiguousTypes #-}
2+
{-# LANGUAGE CPP #-}
33
{-# OPTIONS_GHC -fno-warn-orphans #-}
44

55
module Bio.MAE
@@ -8,13 +8,15 @@ module Bio.MAE
88
, Table (..)
99
, fromFile
1010
, fromText
11+
, modelsFromMaeText
12+
, modelsFromMaeFile
1113
, maeP
1214
) where
1315

1416
import Bio.MAE.Parser
1517
import Bio.MAE.Type (Block (..), FromMaeValue (..),
1618
Mae (..), MaeValue (..), Table (..))
17-
import Bio.Structure (Atom (..), Bond (..), Chain (..),
19+
import Bio.Structure (Atom (..), Bond (..), Chain (..), Model (..),
1820
GlobalID (..), LocalID (..),
1921
Model (..), Residue (..),
2022
SecondaryStructure (..),
@@ -50,6 +52,12 @@ fromFile f = liftIO (TIO.readFile f) >>= either fail pure . parseOnly maeP
5052
fromText :: Text -> Either Text Mae
5153
fromText = first T.pack . parseOnly maeP
5254

55+
modelsFromMaeFile :: (MonadIO m) => FilePath -> m (Either Text (Vector Model))
56+
modelsFromMaeFile = liftIO . fmap modelsFromMaeText . TIO.readFile
57+
58+
modelsFromMaeText :: Text -> Either Text (Vector Model)
59+
modelsFromMaeText maeText = modelsOf <$> fromText maeText
60+
5361
instance StructureModels Mae where
5462
modelsOf Mae{..} = V.fromList $ fmap blockToModel blocks
5563
where
@@ -117,18 +125,23 @@ instance StructureModels Mae where
117125
groupedByResidues = toGroupsOn by group
118126
residues = V.fromList $ fmap groupToResidue groupedByResidues
119127

120-
by :: Int -> (Int, Text)
121-
by i = (unsafeGetFromContents "i_m_residue_number" i, getFromContents defaultChainName "s_m_insertion_code" i)
128+
by :: Int -> (Int, Char)
129+
by i = (unsafeGetFromContents "i_m_residue_number" i, getFromContents defaultInsertionCode "s_m_insertion_code" i)
122130

123131
defaultChainName :: Text
124132
defaultChainName = "A"
125133

134+
defaultInsertionCode :: Char
135+
defaultInsertionCode = ' '
136+
126137
groupToResidue :: [Int] -> Residue
127138
groupToResidue [] = error "Group that is result of List.groupBy can't be empty."
128-
groupToResidue group@(h : _) = Residue name atoms (V.fromList localBonds) secondary chemCompType
139+
groupToResidue group@(h : _) = Residue name residueNumber insertionCode atoms (V.fromList localBonds) secondary chemCompType
129140
where
130-
name = stripQuotes $ unsafeGetFromContents "s_m_pdb_residue_name" h
131-
atoms = V.fromList $ fmap indexToAtom group
141+
name = stripQuotes $ unsafeGetFromContents "s_m_pdb_residue_name" h
142+
residueNumber = unsafeGetFromContents "i_m_residue_number" h
143+
insertionCode = unsafeGetFromContents "s_m_insertion_code" h
144+
atoms = V.fromList $ fmap indexToAtom group
132145

133146
localInds = [0 .. length group - 1]
134147
globalToLocal = M.fromList $ zip group localInds
@@ -146,6 +159,7 @@ instance StructureModels Mae where
146159

147160
indexToAtom :: Int -> Atom
148161
indexToAtom i = Atom (GlobalID i)
162+
(i + 1)
149163
(stripQuotes $ getFromContentsI "s_m_pdb_atom_name")
150164
(elIndToElement M.! getFromContentsI "i_m_atomic_number")
151165
coords

src/Bio/MAE/Type.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ module Bio.MAE.Type
99
import Data.Map.Strict (Map)
1010
import Data.Maybe (fromJust)
1111
import Data.Text (Text)
12+
import qualified Data.Text as T (head, null, dropAround)
1213

1314
data Mae = Mae { version :: Text
1415
, blocks :: [Block]
@@ -58,3 +59,11 @@ instance FromMaeValue Text where
5859
fromMaeValue :: MaeValue -> Maybe Text
5960
fromMaeValue (StringMaeValue t) = Just t
6061
fromMaeValue _ = Nothing
62+
63+
instance FromMaeValue Char where
64+
fromMaeValue :: MaeValue -> Maybe Char
65+
fromMaeValue (StringMaeValue t) = Just $ if T.null t then ' ' else T.head $ stripQuotes t
66+
fromMaeValue _ = Nothing
67+
68+
stripQuotes :: Text -> Text
69+
stripQuotes = T.dropAround (== '"')

src/Bio/MMTF.hs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ fetch pdbid = do let url = fromString $ "https://mmtf.rcsb.org/v1.0/full/" <> pd
4141
decode (getResponseBody resp)
4242

4343
instance StructureModels MMTF where
44+
-- TODO: add global bonds
4445
modelsOf m = l2v (flip Model empty . l2v <$> zipWith (zipWith Chain) chainNames chainResis)
4546
where
4647
chainsCnts = fromIntegral <$> toList (chainsPerModel (model m))
@@ -70,7 +71,8 @@ instance StructureModels MMTF where
7071
in (end, mkAtom <$> zip4 cl nl el ics)
7172

7273
mkResidue :: (GroupType, SecondaryStructure, [Atom]) -> Residue
73-
mkResidue (gt, ss, atoms') = Residue (gtGroupName gt) (l2v atoms')
74+
-- TODO: support residue number here
75+
mkResidue (gt, ss, atoms') = Residue (gtGroupName gt) (-1) ' ' (l2v atoms')
7476
(mkBonds (gtBondAtomList gt) (gtBondOrderList gt))
7577
ss (gtChemCompType gt)
7678

@@ -87,13 +89,14 @@ instance StructureModels MMTF where
8789
z = zCoordList (atom m)
8890
o = occupancyList (atom m)
8991
b = bFactorList (atom m)
90-
in Atom (GlobalID $ fromIntegral (i ! idx))
91-
n
92-
e
93-
(V3 (x ! idx) (y ! idx) (z ! idx))
94-
fc
95-
(b ! idx)
96-
(o ! idx)
92+
in Atom (GlobalID idx)
93+
(fromIntegral $ i ! idx)
94+
n
95+
e
96+
(V3 (x ! idx) (y ! idx) (z ! idx))
97+
fc
98+
(b ! idx)
99+
(o ! idx)
97100

98101
cutter :: [Int] -> [a] -> [[a]]
99102
cutter [] [] = []

src/Bio/PDB.hs

Lines changed: 71 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,82 @@
11
{-# OPTIONS_GHC -fno-warn-orphans #-}
22
module Bio.PDB
3-
(
3+
( modelsFromPDBText
4+
, modelsFromPDBFile
45
) where
56

6-
import qualified Bio.PDB.Type as PDB
7+
import qualified Bio.PDB.Type as PDB
8+
import Bio.PDB.Reader (fromTextPDB, PDBWarnings)
9+
import Bio.PDB.BondRestoring (restoreModelGlobalBonds, restoreChainLocalBonds, residueID)
10+
import Bio.PDB.Functions (groupChainByResidue)
711
import Bio.Structure
812

9-
import Control.Arrow ((&&&))
10-
import Data.Coerce (coerce)
11-
import Data.Foldable (Foldable (..))
12-
import Data.Text as T (Text, singleton, unpack)
13-
import qualified Data.Vector as V
14-
import Linear.V3 (V3 (..))
13+
import Control.Arrow ((&&&))
14+
import Control.Monad.IO.Class (MonadIO, liftIO)
15+
16+
import Data.Text as T (Text, singleton, unpack, strip)
17+
import Data.Text.IO as TIO (readFile)
18+
import Data.Map (Map)
19+
import qualified Data.Map as M ((!), fromList)
20+
import qualified Data.Vector as V
21+
import Data.List (sort)
22+
import Data.Maybe (fromMaybe)
23+
24+
import Text.Read (readMaybe)
25+
26+
import Linear.V3 (V3 (..))
1527

1628
instance StructureModels PDB.PDB where
1729
modelsOf PDB.PDB {..} = fmap mkModel models
1830
where
1931
mkModel :: PDB.Model -> Model
20-
mkModel = flip Model V.empty . fmap mkChain
21-
22-
mkChain :: PDB.Chain -> Chain
23-
mkChain = uncurry Chain . (mkChainName &&& mkChainResidues)
24-
25-
mkChainName :: PDB.Chain -> Text
26-
mkChainName = T.singleton . PDB.atomChainID . safeFirstAtom
27-
28-
mkChainResidues :: PDB.Chain -> V.Vector Residue
29-
mkChainResidues = V.fromList . fmap mkResidue . flip groupByResidue [] . pure . toList
30-
31-
-- can be rewritten with sortOn and groupBy
32-
groupByResidue :: [[PDB.Atom]] -> [PDB.Atom] -> [[PDB.Atom]]
33-
groupByResidue res [] = res
34-
groupByResidue [] (x : xs) = groupByResidue [[x]] xs
35-
groupByResidue res@(lastList : resultTail) (x : xs)
36-
| (PDB.atomResSeq x, PDB.atomICode x) == (PDB.atomResSeq (head lastList), PDB.atomICode (head lastList))
37-
= groupByResidue ((x : lastList) : resultTail) xs
38-
| otherwise = groupByResidue ([x] : res) xs
39-
40-
safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom
41-
safeFirstAtom arr | V.length arr > 0 = arr V.! 0
42-
| otherwise = error "Could not pick first atom"
43-
44-
45-
mkResidue :: [PDB.Atom] -> Residue
46-
mkResidue [] = error "Cound not make residue from empty list"
47-
mkResidue atoms' = Residue (PDB.atomResName . head $ atoms')
48-
(V.fromList $ mkAtom <$> atoms')
49-
V.empty -- now we do not read bonds
50-
Undefined -- now we do not read secondary structure
51-
"" -- chemical component type?!
52-
53-
54-
mkAtom :: PDB.Atom -> Atom
55-
mkAtom PDB.Atom{..} = Atom (coerce atomSerial)
56-
atomName
57-
atomElement
58-
(V3 atomX atomY atomZ)
59-
(read $ T.unpack atomCharge)
60-
atomTempFactor
61-
atomOccupancy
32+
mkModel model = Model (fmap mkChain model) (restoreModelGlobalBonds atomSerialToNilBasedIndex model)
33+
where
34+
atomSerialToNilBasedIndex :: Map Int Int
35+
atomSerialToNilBasedIndex = M.fromList $ allModelAtomSerials `zip` [0..]
36+
37+
allModelAtomSerials :: [Int]
38+
allModelAtomSerials = sort . V.toList . fmap PDB.atomSerial . V.concat $ V.toList model
39+
40+
mkChain :: PDB.Chain -> Chain
41+
mkChain = uncurry Chain . (mkChainName &&& mkChainResidues)
42+
43+
mkChainName :: PDB.Chain -> Text
44+
mkChainName = T.singleton . PDB.atomChainID . safeFirstAtom
45+
46+
mkChainResidues :: PDB.Chain -> V.Vector Residue
47+
mkChainResidues chain = V.fromList . fmap (mkResidue (restoreChainLocalBonds chain)) $ groupChainByResidue chain
48+
49+
safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom
50+
safeFirstAtom arr | V.length arr > 0 = arr V.! 0
51+
| otherwise = error "Could not pick first atom"
52+
53+
mkResidue :: Map Text (V.Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
54+
mkResidue _ [] = error "Cound not make residue from empty list"
55+
mkResidue localBondsMap atoms' = Residue (T.strip $ PDB.atomResName firstResidueAtom)
56+
(PDB.atomResSeq firstResidueAtom)
57+
(PDB.atomICode firstResidueAtom)
58+
(V.fromList $ mkAtom <$> atoms')
59+
(localBondsMap M.! residueID firstResidueAtom)
60+
Undefined -- now we do not read secondary structure
61+
"" -- chemical component type?!
62+
where
63+
firstResidueAtom = head atoms'
64+
65+
mkAtom :: PDB.Atom -> Atom
66+
mkAtom PDB.Atom{..} = Atom (GlobalID $ atomSerialToNilBasedIndex M.! atomSerial)
67+
atomSerial
68+
(T.strip atomName)
69+
atomElement
70+
(V3 atomX atomY atomZ)
71+
(fromMaybe 0 . readMaybe $ T.unpack atomCharge)
72+
atomTempFactor
73+
atomOccupancy
74+
75+
modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], V.Vector Model))
76+
modelsFromPDBFile = liftIO . fmap modelsFromPDBText . TIO.readFile
77+
78+
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], V.Vector Model)
79+
modelsFromPDBText pdbText = do
80+
(warnings, parsedPDB) <- fromTextPDB pdbText
81+
let models = modelsOf parsedPDB
82+
pure (warnings, models)

0 commit comments

Comments
 (0)