|
1 | 1 | {-# OPTIONS_GHC -fno-warn-orphans #-}
|
2 | 2 | module Bio.PDB
|
3 |
| - ( |
| 3 | + ( modelsFromPDBText |
| 4 | + , modelsFromPDBFile |
4 | 5 | ) where
|
5 | 6 |
|
6 |
| -import qualified Bio.PDB.Type as PDB |
| 7 | +import qualified Bio.PDB.Type as PDB |
| 8 | +import Bio.PDB.Reader (fromTextPDB, PDBWarnings) |
| 9 | +import Bio.PDB.BondRestoring (restoreModelGlobalBonds, restoreChainLocalBonds, residueID) |
| 10 | +import Bio.PDB.Functions (groupChainByResidue) |
7 | 11 | import Bio.Structure
|
8 | 12 |
|
9 |
| -import Control.Arrow ((&&&)) |
10 |
| -import Data.Coerce (coerce) |
11 |
| -import Data.Foldable (Foldable (..)) |
12 |
| -import Data.Text as T (Text, singleton, unpack) |
13 |
| -import qualified Data.Vector as V |
14 |
| -import Linear.V3 (V3 (..)) |
| 13 | +import Control.Arrow ((&&&)) |
| 14 | +import Control.Monad.IO.Class (MonadIO, liftIO) |
| 15 | + |
| 16 | +import Data.Text as T (Text, singleton, unpack, strip) |
| 17 | +import Data.Text.IO as TIO (readFile) |
| 18 | +import Data.Map (Map) |
| 19 | +import qualified Data.Map as M ((!), fromList) |
| 20 | +import qualified Data.Vector as V |
| 21 | +import Data.List (sort) |
| 22 | +import Data.Maybe (fromMaybe) |
| 23 | + |
| 24 | +import Text.Read (readMaybe) |
| 25 | + |
| 26 | +import Linear.V3 (V3 (..)) |
15 | 27 |
|
16 | 28 | instance StructureModels PDB.PDB where
|
17 | 29 | modelsOf PDB.PDB {..} = fmap mkModel models
|
18 | 30 | where
|
19 | 31 | mkModel :: PDB.Model -> Model
|
20 |
| - mkModel = flip Model V.empty . fmap mkChain |
21 |
| - |
22 |
| - mkChain :: PDB.Chain -> Chain |
23 |
| - mkChain = uncurry Chain . (mkChainName &&& mkChainResidues) |
24 |
| - |
25 |
| - mkChainName :: PDB.Chain -> Text |
26 |
| - mkChainName = T.singleton . PDB.atomChainID . safeFirstAtom |
27 |
| - |
28 |
| - mkChainResidues :: PDB.Chain -> V.Vector Residue |
29 |
| - mkChainResidues = V.fromList . fmap mkResidue . flip groupByResidue [] . pure . toList |
30 |
| - |
31 |
| - -- can be rewritten with sortOn and groupBy |
32 |
| - groupByResidue :: [[PDB.Atom]] -> [PDB.Atom] -> [[PDB.Atom]] |
33 |
| - groupByResidue res [] = res |
34 |
| - groupByResidue [] (x : xs) = groupByResidue [[x]] xs |
35 |
| - groupByResidue res@(lastList : resultTail) (x : xs) |
36 |
| - | (PDB.atomResSeq x, PDB.atomICode x) == (PDB.atomResSeq (head lastList), PDB.atomICode (head lastList)) |
37 |
| - = groupByResidue ((x : lastList) : resultTail) xs |
38 |
| - | otherwise = groupByResidue ([x] : res) xs |
39 |
| - |
40 |
| - safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom |
41 |
| - safeFirstAtom arr | V.length arr > 0 = arr V.! 0 |
42 |
| - | otherwise = error "Could not pick first atom" |
43 |
| - |
44 |
| - |
45 |
| - mkResidue :: [PDB.Atom] -> Residue |
46 |
| - mkResidue [] = error "Cound not make residue from empty list" |
47 |
| - mkResidue atoms' = Residue (PDB.atomResName . head $ atoms') |
48 |
| - (V.fromList $ mkAtom <$> atoms') |
49 |
| - V.empty -- now we do not read bonds |
50 |
| - Undefined -- now we do not read secondary structure |
51 |
| - "" -- chemical component type?! |
52 |
| - |
53 |
| - |
54 |
| - mkAtom :: PDB.Atom -> Atom |
55 |
| - mkAtom PDB.Atom{..} = Atom (coerce atomSerial) |
56 |
| - atomName |
57 |
| - atomElement |
58 |
| - (V3 atomX atomY atomZ) |
59 |
| - (read $ T.unpack atomCharge) |
60 |
| - atomTempFactor |
61 |
| - atomOccupancy |
| 32 | + mkModel model = Model (fmap mkChain model) (restoreModelGlobalBonds atomSerialToNilBasedIndex model) |
| 33 | + where |
| 34 | + atomSerialToNilBasedIndex :: Map Int Int |
| 35 | + atomSerialToNilBasedIndex = M.fromList $ allModelAtomSerials `zip` [0..] |
| 36 | + |
| 37 | + allModelAtomSerials :: [Int] |
| 38 | + allModelAtomSerials = sort . V.toList . fmap PDB.atomSerial . V.concat $ V.toList model |
| 39 | + |
| 40 | + mkChain :: PDB.Chain -> Chain |
| 41 | + mkChain = uncurry Chain . (mkChainName &&& mkChainResidues) |
| 42 | + |
| 43 | + mkChainName :: PDB.Chain -> Text |
| 44 | + mkChainName = T.singleton . PDB.atomChainID . safeFirstAtom |
| 45 | + |
| 46 | + mkChainResidues :: PDB.Chain -> V.Vector Residue |
| 47 | + mkChainResidues chain = V.fromList . fmap (mkResidue (restoreChainLocalBonds chain)) $ groupChainByResidue chain |
| 48 | + |
| 49 | + safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom |
| 50 | + safeFirstAtom arr | V.length arr > 0 = arr V.! 0 |
| 51 | + | otherwise = error "Could not pick first atom" |
| 52 | + |
| 53 | + mkResidue :: Map Text (V.Vector (Bond LocalID)) -> [PDB.Atom] -> Residue |
| 54 | + mkResidue _ [] = error "Cound not make residue from empty list" |
| 55 | + mkResidue localBondsMap atoms' = Residue (T.strip $ PDB.atomResName firstResidueAtom) |
| 56 | + (PDB.atomResSeq firstResidueAtom) |
| 57 | + (PDB.atomICode firstResidueAtom) |
| 58 | + (V.fromList $ mkAtom <$> atoms') |
| 59 | + (localBondsMap M.! residueID firstResidueAtom) |
| 60 | + Undefined -- now we do not read secondary structure |
| 61 | + "" -- chemical component type?! |
| 62 | + where |
| 63 | + firstResidueAtom = head atoms' |
| 64 | + |
| 65 | + mkAtom :: PDB.Atom -> Atom |
| 66 | + mkAtom PDB.Atom{..} = Atom (GlobalID $ atomSerialToNilBasedIndex M.! atomSerial) |
| 67 | + atomSerial |
| 68 | + (T.strip atomName) |
| 69 | + atomElement |
| 70 | + (V3 atomX atomY atomZ) |
| 71 | + (fromMaybe 0 . readMaybe $ T.unpack atomCharge) |
| 72 | + atomTempFactor |
| 73 | + atomOccupancy |
| 74 | + |
| 75 | +modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], V.Vector Model)) |
| 76 | +modelsFromPDBFile = liftIO . fmap modelsFromPDBText . TIO.readFile |
| 77 | + |
| 78 | +modelsFromPDBText :: Text -> Either Text ([PDBWarnings], V.Vector Model) |
| 79 | +modelsFromPDBText pdbText = do |
| 80 | + (warnings, parsedPDB) <- fromTextPDB pdbText |
| 81 | + let models = modelsOf parsedPDB |
| 82 | + pure (warnings, models) |
0 commit comments