Skip to content

Commit b228824

Browse files
committed
Reworked network stuff to use libcurl - it supports https and has easier error handling. Added error handling! Transform functions now return maybe articles.
1 parent 6f024b0 commit b228824

File tree

5 files changed

+54
-49
lines changed

5 files changed

+54
-49
lines changed

PipeFeed.cabal

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ executable PipeFeed
1111
base >= 4,
1212
configurator >=0.2 && <0.3,
1313
text >=0.11.3 && <0.12,
14-
network >=2.4.1 && <2.5,
15-
HTTP >=4000.2.8 && <4000.3,
1614
regex-tdfa >=1.1.8 && <1.2,
1715
feed >=0.3.9 && <0.4,
1816
hashable >=1.2.0 && <1.3,
1917
directory >=1.2.0 && <1.3,
2018
strict >=0.3.2 && <0.4,
21-
xml >=1.3.13 && <1.4
19+
xml >=1.3.13 && <1.4,
20+
download-curl >=0.1.4 && <0.2,
21+
network >=2.4.1 && <2.5
2222
ghc-options: -fglasgow-exts
2323
extensions:
2424
NoMonomorphismRestriction,

src/PipeFeed.hs

+25-25
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module Main where
22

33
import Config as Conf
44
import Types as T
5+
import Util
56
import Transforms
67

78
import Text.Feed.Import
@@ -13,9 +14,7 @@ import Text.Feed.Types as FT
1314
import Text.Feed.Export(xmlFeed)
1415
import Text.XML.Light.Output(showTopElement)
1516

16-
import Network.URI
17-
import Network.HTTP(simpleHTTP,getRequest,getResponseBody)
18-
import Data.Maybe(fromMaybe,maybe)
17+
import Data.Maybe(fromMaybe,maybe,catMaybes)
1918
import Data.Hashable(hash)
2019
import System.Directory
2120
import System.Environment
@@ -35,7 +34,7 @@ main = do
3534
print $ "Using " ++ cfgLoc ++ " as config file."
3635
config <- Conf.configure cfgLoc
3736
feeds <- mapM fetchFeed (feeds config)
38-
feeds <- mapM (loadCache config . hashFeed) feeds
37+
feeds <- mapM (loadCache config . hashFeed) (catMaybes feeds)
3938
feeds <- mapM transform feeds
4039
mapM_ (writeCache config) feeds
4140
mapM_ (deleteCache config) feeds
@@ -45,22 +44,24 @@ main = do
4544

4645
return ()
4746

48-
fetchFeed::T.Feed -> IO T.Feed
47+
fetchFeed::T.Feed -> IO (Maybe T.Feed)
4948
fetchFeed feedcfg = do
50-
let url = feedurl feedcfg
51-
rsp <- simpleHTTP (getRequest url) -- >>= fmap (take 100) . getResponseBody
52-
feedText <- getResponseBody rsp
53-
--print feedText
54-
let feed = fromMaybe (error "Can't get feed") (parseFeedString feedText)
49+
rsp <- grabUrl $ feedurl feedcfg
50+
case rsp of
51+
Nothing -> do
52+
print $ "Error fetching " ++ feedurl feedcfg
53+
return Nothing
54+
Just feedText -> do
55+
let feed = fromMaybe (error "Can't get feed") (parseFeedString feedText)
5556

56-
let items = Query.feedItems feed
57-
let title = getFeedTitle feed
57+
let items = Query.feedItems feed
58+
let title = getFeedTitle feed
5859

59-
let author = fromMaybe "unknown" (getFeedAuthor feed)
60+
let author = fromMaybe "unknown" (getFeedAuthor feed)
6061

61-
print $ "Fetched " ++ show (length items) ++ " items from " ++ name feedcfg
62+
print $ "Fetched " ++ show (length items) ++ " items from " ++ name feedcfg
6263

63-
let articles = map (\item ->
64+
let articles = map (\item ->
6465
Article{title=fromMaybe "Unknown title" (getItemTitle item)
6566
, body=maybe "nowt" fst3 (getItemEnclosure item)
6667
, itemurl=fromMaybe "Unknown url" (getItemLink item)
@@ -69,11 +70,7 @@ fetchFeed feedcfg = do
6970
, itemRec=item
7071
, bodyhash=Nothing} ) items
7172

72-
return ( feedcfg{ items=articles, feedRec=feed } )
73-
74-
--haskell doesn't have basic triple-or-above manip funcs, wtf?
75-
fst3::(a,b,c)->a
76-
fst3 (a,b,c) = a
73+
return ( Just $ feedcfg{ items=articles, feedRec=feed } )
7774

7875
--any on disk should be loaded
7976
--this loads body into matching article, and marks that article as transformed
@@ -134,19 +131,22 @@ hashFeed feed = feed{items=map (\(a,h) -> a{bodyhash=Just h})
134131

135132
--apply the transforms in order
136133
--also marks transformed
134+
--todo one day implement some sort of "error article" in case of error, so that feed consumer can see issue in feed
137135
transform :: T.Feed -> IO T.Feed
138136
transform feed = do
139137
articles<-mapM (\article ->
140138
if transformed article
141-
then return article{transformed=True}
139+
then return $ Just $ article{transformed=True}
142140
else applyTransforms article
143141
) (items feed)
144142

145-
return $ updateFeedItems feed articles
143+
return $ updateFeedItems feed (catMaybes articles)
146144

147-
where applyTransforms :: Article -> IO Article
148-
applyTransforms article = foldM (\acc f -> (f acc)) article
149-
(transforms feed)
145+
where applyTransforms :: Article -> IO (Maybe Article)
146+
applyTransforms article = foldM (\acc f -> case acc of
147+
Nothing -> return Nothing
148+
Just art -> f art) (Just article)
149+
(transforms feed)
150150
--write the resulting feed
151151
serialiseFeed:: T.Config -> T.Feed -> IO()
152152
serialiseFeed cfg feed = do

src/Transforms.hs

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
module Transforms where
22

33
import Text.Regex.TDFA ((=~))
4-
--import Network.URI
5-
--import Network.HTTP(simpleHTTP,getRequest,getResponseBody)
64
import Util
75
import Types
86
import Control.Concurrent
97

108
--fetches full length version of article and packs into body
11-
fetchfull::Article -> IO Article
9+
fetchfull::Article -> IO (Maybe Article)
1210
fetchfull article = do
1311
print $ "Fetching full article from " ++ itemurl article
14-
newbody <- grabUrl $ itemurl article
12+
rsp <- grabUrl $ itemurl article
1513
threadDelay $ 10*1000*1000
16-
return $ updateArticle article newbody
14+
return $ case rsp of
15+
Nothing -> Nothing
16+
Just newbody -> Just $ updateArticle article newbody
1717

1818
--new body only has content between start_re and end_re
19-
regexsnip::String -> String -> Article -> IO Article
19+
regexsnip::String -> String -> Article -> IO (Maybe Article)
2020
regexsnip start_re end_re article = do
2121
let (_,_,post) = body article =~ start_re :: (String, String, String)
2222
let (newbody,_,_) = post =~ end_re :: (String,String,String)
23-
return $ updateArticle article newbody
23+
return $ Just $ updateArticle article newbody
2424

2525

src/Types.hs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
module Types where
22

3-
import Network.URI
43
import Data.List(intercalate)
54
import Text.Feed.Types as F
65
import Text.RSS.Syntax as RSS2
@@ -22,7 +21,7 @@ data Feed = Feed { name :: String
2221
, items :: [Article]
2322
, feedurl :: String
2423
, feedRec :: F.Feed
25-
, transforms :: [Article -> IO Article]
24+
, transforms :: [Article -> IO (Maybe Article)]
2625
}
2726

2827
data Config = Config { feeds :: [Types.Feed]

src/Util.hs

+18-12
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
11
module Util where
22

3-
import Network.Browser
4-
import Network.HTTP.Base (rspBody)
5-
import Network.HTTP (getRequest)
3+
import Network.Curl.Download
4+
import Data.Either
65

7-
grabUrl :: String -> IO String
8-
grabUrl url = fmap (rspBody . snd) . browse $ do
9-
-- Disable logging output
10-
setErrHandler $ const (return ())
11-
setOutHandler $ const (return ())
12-
13-
setAllowRedirects True
14-
request $ getRequest url
6+
grabUrl :: String -> IO (Maybe String)
7+
grabUrl url = do
8+
result <- openURIString url
9+
case result of
10+
Left error -> do
11+
print $ "Error fetching " ++ url ++ " Error: " ++ error
12+
return Nothing
13+
Right body ->
14+
return $ Just body
1515

16-
16+
--for error handling Eithers - crap haskell stdlib again?
17+
isLeft (Left _) = True
18+
isLeft _ = False
19+
20+
--haskell doesn't have basic triple-or-above manip funcs, wtf?
21+
fst3::(a,b,c)->a
22+
fst3 (a,b,c) = a

0 commit comments

Comments
 (0)