I want to use xml-conduit, specifically Text.XML.Stream.Parse in order to lazily extract a list of objects from a large XML file.
As a test case, I use the recently
Made an edit to bring the insightful example from M. Snoyman up to date, but it was tossed by mediocre power-trippers. Therefore, this.
The original won't compile anymore and produces many deprecated warnings (legacy syntax).
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
import Control.Applicative ((<*))
import Control.Concurrent (threadDelay)
import Control.Monad (forever, void)
import Control.Monad.Catch (MonadThrow)
import Control.Monad.IO.Class (MonadIO (liftIO))
import Data.ByteString (ByteString)
import Data.Conduit
import qualified Data.Conduit.List as CL
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8)
import Data.XML.Types (Event)
import Text.XML.Stream.Parse
-- instead of actually including a large input data file, just for testing purposes
infiniteInput :: MonadIO m => ConduitT () ByteString m ()
infiniteInput = do
yield ""
forever $ do
yield $ encodeUtf8
"
"
liftIO $ threadDelay 1000000
--yield "|
" -- will never be reached
data User = User {name :: Text} deriving (Show)
parseUserRow :: MonadThrow m => forall o. ConduitT Event o m (Maybe User)
parseUserRow = tag' "row" (requireAttr "DisplayName" <* ignoreAttrs) $ \displayName -> do
return $ User displayName
parseUsers :: MonadThrow m => ConduitT Event User m ()
parseUsers = void $ tagNoAttr "users" $ manyYield parseUserRow
--or use manyYield, now provided by Text.XML.Stream.Parse
yieldWhileJust :: Monad m
=> ConduitT a b m (Maybe b)
-> ConduitT a b m ()
yieldWhileJust consumer =
loop
where
loop = do
mx <- consumer
case mx of
Nothing -> return ()
Just x -> yield x >> loop
main :: IO ()
main = runConduit $ infiniteInput
.| parseBytes def
.| parseUsers
.| CL.mapM_ print
ghc 8.6.5, xml-conduit 1.9.0.0