问题
I've been struggling with this Conduit code for a while, any help would be extremely appreciated. It is sort of like this code has been evolving by random mutation while the type checker is enforcing natural selection. Here is one of the fittest candidates I have so far:
import Conduit
import qualified Data.Conduit.Combinators as DCC
import Data.CSV.Conduit
import Data.Function ((&))
import Data.List.Split (splitOn)
import Data.Map as DM
import Data.Text (Text)
import qualified Data.Text as Txt
import qualified Data.Text.IO as DTIO
import Data.Vector (Vector)
import qualified Data.Vector as DV
import Path
import System.FilePath.Posix
retrieveSmaXtec :: Path Abs Dir -> IO (Vector (MapRow Text))
retrieveSmaXtec sxDir = do
files <- sourceDirectoryDeep False (fromAbsDir sxDir) & return
fileVector <- return $ runConduit $ files .| sinkVector
csvRowsByFile <- runConduit ((yieldM fileVector) .| DCC.mapM processCSV .| sinkVector)
fNameRows <- readFnameData $ yieldM fileVector
(pairFill fNameRows csvRowsByFile)
& fmap (uncurry DM.union)
& return
where
fileList :: Path Abs Dir -> IO (Vector FilePath)
fileList dir = sourceDirectoryDeep False (fromAbsDir sxDir) .| sinkVector & runConduit
expandZip :: MapRow Text -> Vector (MapRow Text) -> Vector (MapRow Text, MapRow Text)
expandZip one many = zip (replicate mlen one) many
where
mlen = length many
pairFill :: Vector (MapRow Text) -> Vector (Vector (MapRow Text)) -> Vector (MapRow Text, MapRow Text)
pairFill ones manies = join $ fmap (uncurry expandZip) (zip ones manies)
processCSV :: FilePath -> IO (Vector (MapRow Text))
processCSV fp = sourceFile fp
.| intoCSV defCSVSettings
.| sinkVector
& runConduitRes
readFnameData :: (MonadThrow m, MonadResource m, PrimMonad m) => ConduitT () FilePath m () -> m (Vector (MapRow Text))
readFnameData files = runConduit $ files .| processFileName .| sinkVector
processFileName :: (MonadResource m, MonadThrow m, PrimMonad m) =>
ConduitT FilePath (MapRow Text) m ()
processFileName = mapC go
where
go :: FilePath -> MapRow Text
go fp = takeFileName fp
& takeWhile (/= '.')
& splitOn "_"
& fmap Txt.pack
& zip colNames
& DM.fromList
colNames = [markKey, idKey]
The current point of confusion that occurs in both errors below is that [FilePath]
is popping up, when I expect everything to just be FilePath
. Now, even if this is fixed, I wouldn't doubt other errors could pop up, so if there's a solution for getting this going that involves a bit of a rework, I'd be happy to try it.
* Couldn't match type `Char' with `[Char]'
Expected type: ConduitM
[FilePath] Void IO (Vector (Vector (MapRow Text)))
Actual type: ConduitM
FilePath Void IO (Vector (Vector (MapRow Text)))
* In the second argument of `(.|)', namely
`DCC.mapM processCSV .| sinkVector'
In the first argument of `runConduit', namely
`((yieldM fileVector) .| DCC.mapM processCSV .| sinkVector)'
In a stmt of a 'do' block:
csvRowsByFile <- runConduit
((yieldM fileVector) .| DCC.mapM processCSV .| sinkVector)
|
40 | csvRowsByFile <- runConduit ((yieldM fileVector) .| DCC.mapM processCSV .| sinkVector)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* Couldn't match type `[Char]' with `Char'
Expected type: ConduitT () FilePath IO ()
Actual type: ConduitT () [FilePath] IO ()
* In the second argument of `($)', namely `yieldM fileVector'
In a stmt of a 'do' block:
fNameRows <- readFnameData $ yieldM fileVector
In the expression:
do files <- sourceDirectoryDeep False (fromAbsDir sxDir) & return
fileVector <- return $ runConduit $ files .| sinkVector
csvRowsByFile <- runConduit
((yieldM fileVector) .| DCC.mapM processCSV .| sinkVector)
fNameRows <- readFnameData $ yieldM fileVector
....
|
41 | fNameRows <- readFnameData $ yieldM fileVector
| ^^^^^^^^^^^^^^^^^
This question started in an alternative form at How to merge one-to-one and one-to-many input:output relationships in conduit? but now I'm just trying to get it to work, somehow, anyhow.
回答1:
I came up with a solution after getting some sleep and spending some more time on it. I still don't quite understand why some things I tried didn't work, but I'm reasonably happy with the end result (if not the path I took to get there, but learning is pain, at least sometimes). The major difference here is that I decided re-use the sourceDirectoryDeep
conduit (files
now) instead of trying to turn it into a vector directly. I also had to be a little more clever with how I wrote processCSV
, which did involve one false turn that still confuses me (Why can one sometimes get "No instance for CSV Text Text arising from a use of `intoCSV`" when using csv-conduit?).
retrieveSmaXtec :: Path Abs Dir -> IO (Vector SxRecord)
retrieveSmaXtec sxDir = do
csvRows <- getCsvRows
fnameRows <- getFileNameRows
rows <- return $ pairFill fnameRows csvRows & fmap (uncurry DM.union)
print rows
rows & fmap fromRow & catMaybes & return
where
getCsvRows :: IO (Vector (Vector (MapRow Text)))
getCsvRows = files .| processCSV & runConduitRes
getFileNameRows :: IO (Vector (MapRow Text))
getFileNameRows = files .| processFileName & runConduitRes
files :: MonadResource m => ConduitT () FilePath m ()
files = sourceDirectoryDeep False (fromAbsDir sxDir)
expandZip :: MapRow Text -> Vector (MapRow Text) -> Vector (MapRow Text, MapRow Text)
expandZip one many_ = zip (replicate mlen one) many_
where
mlen = length many_
pairFill :: Vector (MapRow Text) -> Vector (Vector (MapRow Text)) -> Vector (MapRow Text, MapRow Text)
pairFill ones manies = join $ fmap (uncurry expandZip) (zip ones manies)
processCSV :: (MonadResource m, MonadThrow m, PrimMonad m) =>
ConduitT FilePath Void m (Vector (Vector (MapRow Text)))
processCSV = mapMC (readCSVFile defCSVSettings) .| sinkVector
processFileName :: (MonadResource m, MonadThrow m, PrimMonad m) =>
ConduitT FilePath Void m (Vector (MapRow Text))
processFileName = mapC go
.| sinkVector
where
go :: FilePath -> MapRow Text
go fp = takeFileName fp
& takeWhile (/= '.')
& splitOn "_"
& fmap Txt.pack
& zip colNames
& DM.fromList
colNames = [markKey, idKey]
来源:https://stackoverflow.com/questions/54357690/how-to-fix-this-conduit-code-invovling-the-appearance-of-a-list-type-where-i-do