{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections     #-}

module Poseidon.CLI.Jannocoalesce where

import           Poseidon.Janno         (JannoRow (..), JannoRows (..),
                                         readJannoFile, writeJannoFile)
import           Poseidon.Package       (PackageReadOptions (..),
                                         defaultPackageReadOptions,
                                         getJointJanno,
                                         readPoseidonPackageCollection)
import           Poseidon.Utils         (PoseidonException (..), PoseidonIO,
                                         logDebug, logInfo, logWarning)

import           Control.Monad          (filterM, forM_, when)
import           Control.Monad.Catch    (MonadThrow, throwM)
import           Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8  as BSC
import qualified Data.Csv               as Csv
import qualified Data.HashMap.Strict    as HM
import qualified Data.IORef             as R
import           Data.List              ((\\))
import           Data.Text              (pack, replace, unpack)
import           System.Directory       (createDirectoryIfMissing)
import           System.FilePath        (takeDirectory)
import           Text.Regex.TDFA        ((=~))

-- the source can be a single janno file, or a set of base directories as usual.
data JannoSourceSpec = JannoSourceSingle FilePath | JannoSourceBaseDirs [FilePath]

data CoalesceJannoColumnSpec =
      AllJannoColumns
    | IncludeJannoColumns [BSC.ByteString]
    | ExcludeJannoColumns [BSC.ByteString]

data JannoCoalesceOptions = JannoCoalesceOptions
    { JannoCoalesceOptions -> JannoSourceSpec
_jannocoalesceSource           :: JannoSourceSpec
    , JannoCoalesceOptions -> [Char]
_jannocoalesceTarget           :: FilePath
    , JannoCoalesceOptions -> Maybe [Char]
_jannocoalesceOutSpec          :: Maybe FilePath -- Nothing means "in place"
    , JannoCoalesceOptions -> CoalesceJannoColumnSpec
_jannocoalesceJannoColumns     :: CoalesceJannoColumnSpec
    , JannoCoalesceOptions -> Bool
_jannocoalesceOverwriteColumns :: Bool
    , JannoCoalesceOptions -> [Char]
_jannocoalesceSourceKey        :: String -- by default set to "Poseidon_ID"
    , JannoCoalesceOptions -> [Char]
_jannocoalesceTargetKey        :: String -- by default set to "Poseidon_ID"
    , JannoCoalesceOptions -> Maybe [Char]
_jannocoalesceIdStrip          :: Maybe String -- an optional regex to strip from target and source keys
    }

runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
runJannocoalesce (JannoCoalesceOptions JannoSourceSpec
sourceSpec [Char]
target Maybe [Char]
outSpec CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip) = do
    JannoRows [JannoRow]
sourceRows <- case JannoSourceSpec
sourceSpec of
        JannoSourceSingle [Char]
sourceFile -> [Char] -> ReaderT Env IO JannoRows
readJannoFile [Char]
sourceFile
        JannoSourceBaseDirs [[Char]]
sourceDirs -> do
            let pacReadOpts :: PackageReadOptions
pacReadOpts = PackageReadOptions
defaultPackageReadOptions {
                      _readOptIgnoreChecksums :: Bool
_readOptIgnoreChecksums      = Bool
True
                    , _readOptGenoCheck :: Bool
_readOptGenoCheck            = Bool
False
                    , _readOptIgnoreGeno :: Bool
_readOptIgnoreGeno           = Bool
True
                    , _readOptOnlyLatest :: Bool
_readOptOnlyLatest           = Bool
True
                }
            [PoseidonPackage] -> JannoRows
getJointJanno ([PoseidonPackage] -> JannoRows)
-> ReaderT Env IO [PoseidonPackage] -> ReaderT Env IO JannoRows
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> PackageReadOptions -> [[Char]] -> ReaderT Env IO [PoseidonPackage]
readPoseidonPackageCollection PackageReadOptions
pacReadOpts [[Char]]
sourceDirs
    JannoRows [JannoRow]
targetRows <- [Char] -> ReaderT Env IO JannoRows
readJannoFile [Char]
target

    [JannoRow]
newJanno <- [JannoRow]
-> [JannoRow]
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> Maybe [Char]
-> PoseidonIO [JannoRow]
makeNewJannoRows [JannoRow]
sourceRows [JannoRow]
targetRows CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip

    let outPath :: [Char]
outPath = [Char] -> ([Char] -> [Char]) -> Maybe [Char] -> [Char]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Char]
target [Char] -> [Char]
forall a. a -> a
id Maybe [Char]
outSpec
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Writing to file (directory will be created if missing): " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
outPath
    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
        Bool -> [Char] -> IO ()
createDirectoryIfMissing Bool
True ([Char] -> [Char]
takeDirectory [Char]
outPath)
        [Char] -> JannoRows -> IO ()
writeJannoFile [Char]
outPath ([JannoRow] -> JannoRows
JannoRows [JannoRow]
newJanno)

type CounterMismatches = R.IORef Int
type CounterCopied     = R.IORef Int

makeNewJannoRows :: [JannoRow] -> [JannoRow] -> CoalesceJannoColumnSpec -> Bool -> String -> String -> Maybe String -> PoseidonIO [JannoRow]
makeNewJannoRows :: [JannoRow]
-> [JannoRow]
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> Maybe [Char]
-> PoseidonIO [JannoRow]
makeNewJannoRows [JannoRow]
sourceRows [JannoRow]
targetRows CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip = do
    [Char] -> PoseidonIO ()
logInfo [Char]
"Starting to coalesce..."
    IORef Int
counterMismatches <- IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> ReaderT Env IO (IORef Int))
-> IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
R.newIORef Int
0
    IORef Int
counterCopied <- IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> ReaderT Env IO (IORef Int))
-> IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
R.newIORef Int
0
    [JannoRow]
newRows <- (JannoRow -> ReaderT Env IO JannoRow)
-> [JannoRow] -> PoseidonIO [JannoRow]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (IORef Int -> IORef Int -> JannoRow -> ReaderT Env IO JannoRow
makeNewJannoRow IORef Int
counterMismatches IORef Int
counterCopied) [JannoRow]
targetRows
    Int
counterCopiedVal <- IO Int -> ReaderT Env IO Int
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> ReaderT Env IO Int) -> IO Int -> ReaderT Env IO Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
R.readIORef IORef Int
counterCopied
    Int
counterMismatchesVal <- IO Int -> ReaderT Env IO Int
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> ReaderT Env IO Int) -> IO Int -> ReaderT Env IO Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
R.readIORef IORef Int
counterMismatches
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Copied " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
counterCopiedVal [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" values"
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
counterMismatchesVal Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$
        [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Failed to find matches for " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
counterMismatchesVal [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" target rows in source"
    [JannoRow] -> PoseidonIO [JannoRow]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [JannoRow]
newRows
    where
        makeNewJannoRow :: CounterMismatches -> CounterCopied -> JannoRow -> PoseidonIO JannoRow
        makeNewJannoRow :: IORef Int -> IORef Int -> JannoRow -> ReaderT Env IO JannoRow
makeNewJannoRow IORef Int
cm IORef Int
cp JannoRow
targetRow = do
            [Char]
posId <- JannoRow -> [Char] -> ReaderT Env IO [Char]
forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
targetRow [Char]
tKey
            [JannoRow]
sourceRowCandidates <- (JannoRow -> ReaderT Env IO Bool)
-> [JannoRow] -> PoseidonIO [JannoRow]
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM (\JannoRow
r -> (Maybe [Char] -> [Char] -> [Char] -> Bool
matchWithOptionalStrip Maybe [Char]
maybeStrip [Char]
posId) ([Char] -> Bool) -> ReaderT Env IO [Char] -> ReaderT Env IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> JannoRow -> [Char] -> ReaderT Env IO [Char]
forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
r [Char]
sKey) [JannoRow]
sourceRows
            case [JannoRow]
sourceRowCandidates of
                [] -> do
                    [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"no match for target " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
posId [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" in source"
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
R.modifyIORef IORef Int
cm (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                    JannoRow -> ReaderT Env IO JannoRow
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return JannoRow
targetRow
                [JannoRow
keyRow] -> IORef Int
-> JannoRow
-> JannoRow
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> ReaderT Env IO JannoRow
mergeRow IORef Int
cp JannoRow
targetRow JannoRow
keyRow CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey
                [JannoRow]
_ -> PoseidonException -> ReaderT Env IO JannoRow
forall e a. Exception e => e -> ReaderT Env IO a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> ReaderT Env IO JannoRow)
-> PoseidonException -> ReaderT Env IO JannoRow
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenericException ([Char] -> PoseidonException) -> [Char] -> PoseidonException
forall a b. (a -> b) -> a -> b
$ [Char]
"source file contains multiple rows with key " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
posId

getKeyFromJanno :: (MonadThrow m) => JannoRow -> String -> m String
getKeyFromJanno :: forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
jannoRow [Char]
key = do
    let jannoRowDict :: NamedRecord
jannoRowDict = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
jannoRow
    case NamedRecord
jannoRowDict NamedRecord -> ByteString -> Maybe ByteString
forall k v. (Eq k, Hashable k) => HashMap k v -> k -> Maybe v
HM.!? ([Char] -> ByteString
BSC.pack [Char]
key) of
        Maybe ByteString
Nothing -> PoseidonException -> m [Char]
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> m [Char]) -> PoseidonException -> m [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenericException ([Char]
"Key " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
key [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" not present in .janno file")
        Just ByteString
r -> [Char] -> m [Char]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> m [Char]) -> [Char] -> m [Char]
forall a b. (a -> b) -> a -> b
$ ByteString -> [Char]
BSC.unpack ByteString
r

matchWithOptionalStrip :: (Maybe String) -> String -> String -> Bool
matchWithOptionalStrip :: Maybe [Char] -> [Char] -> [Char] -> Bool
matchWithOptionalStrip Maybe [Char]
maybeRegex [Char]
id1 [Char]
id2 =
    case Maybe [Char]
maybeRegex of
        Maybe [Char]
Nothing -> [Char]
id1 [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
id2
        Just [Char]
r ->
            let id1stripped :: [Char]
id1stripped = [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
id1
                id2stripped :: [Char]
id2stripped = [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
id2
            in  [Char]
id1stripped [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
id2stripped
  where
    stripR :: String -> String -> String
    stripR :: [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
s =
        let match :: [Char]
match = [Char]
s [Char] -> [Char] -> [Char]
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
 RegexContext Regex source1 target) =>
source1 -> source -> target
=~ [Char]
r
        in  if [Char] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Char]
match then [Char]
s else Text -> [Char]
unpack (Text -> [Char]) -> Text -> [Char]
forall a b. (a -> b) -> a -> b
$ HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
replace ([Char] -> Text
pack [Char]
match) Text
"" ([Char] -> Text
pack [Char]
s)

mergeRow :: CounterCopied -> JannoRow -> JannoRow -> CoalesceJannoColumnSpec -> Bool -> String -> String -> PoseidonIO JannoRow
mergeRow :: IORef Int
-> JannoRow
-> JannoRow
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> ReaderT Env IO JannoRow
mergeRow IORef Int
cp JannoRow
targetRow JannoRow
sourceRow CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey = do
    let sourceKeys :: [ByteString]
sourceKeys        = NamedRecord -> [ByteString]
forall k v. HashMap k v -> [k]
HM.keys NamedRecord
sourceRowRecord
        sourceKeysDesired :: [ByteString]
sourceKeysDesired = [ByteString] -> CoalesceJannoColumnSpec -> [ByteString]
determineDesiredSourceKeys [ByteString]
sourceKeys CoalesceJannoColumnSpec
fields
        -- fill in the target row with dummy values for desired fields that might not be present yet
        targetComplete :: NamedRecord
targetComplete    = NamedRecord -> NamedRecord -> NamedRecord
forall k v.
(Eq k, Hashable k) =>
HashMap k v -> HashMap k v -> HashMap k v
HM.union NamedRecord
targetRowRecord ([(ByteString, ByteString)] -> NamedRecord
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList ([(ByteString, ByteString)] -> NamedRecord)
-> [(ByteString, ByteString)] -> NamedRecord
forall a b. (a -> b) -> a -> b
$ (ByteString -> (ByteString, ByteString))
-> [ByteString] -> [(ByteString, ByteString)]
forall a b. (a -> b) -> [a] -> [b]
map (, ByteString
BSC.empty) [ByteString]
sourceKeysDesired)
        newRowRecord :: NamedRecord
newRowRecord      = (ByteString -> ByteString -> ByteString)
-> NamedRecord -> NamedRecord
forall k v1 v2. (k -> v1 -> v2) -> HashMap k v1 -> HashMap k v2
HM.mapWithKey ByteString -> ByteString -> ByteString
fillFromSource NamedRecord
targetComplete
        parseResult :: Either [Char] JannoRow
parseResult       = Parser JannoRow -> Either [Char] JannoRow
forall a. Parser a -> Either [Char] a
Csv.runParser (Parser JannoRow -> Either [Char] JannoRow)
-> (NamedRecord -> Parser JannoRow)
-> NamedRecord
-> Either [Char] JannoRow
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NamedRecord -> Parser JannoRow
forall a. FromNamedRecord a => NamedRecord -> Parser a
Csv.parseNamedRecord (NamedRecord -> Either [Char] JannoRow)
-> NamedRecord -> Either [Char] JannoRow
forall a b. (a -> b) -> a -> b
$ NamedRecord
newRowRecord
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"matched target " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack (NamedRecord
targetComplete  NamedRecord -> ByteString -> ByteString
forall k v.
(Eq k, Hashable k, HasCallStack) =>
HashMap k v -> k -> v
HM.! [Char] -> ByteString
BSC.pack [Char]
tKey) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
              [Char]
" with source "   [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack (NamedRecord
sourceRowRecord NamedRecord -> ByteString -> ByteString
forall k v.
(Eq k, Hashable k, HasCallStack) =>
HashMap k v -> k -> v
HM.! [Char] -> ByteString
BSC.pack [Char]
sKey)
    case Either [Char] JannoRow
parseResult of
        Left [Char]
err -> PoseidonException -> ReaderT Env IO JannoRow
forall e a. Exception e => e -> ReaderT Env IO a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> ReaderT Env IO JannoRow)
-> ([Char] -> PoseidonException)
-> [Char]
-> ReaderT Env IO JannoRow
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonGenericException ([Char] -> ReaderT Env IO JannoRow)
-> [Char] -> ReaderT Env IO JannoRow
forall a b. (a -> b) -> a -> b
$ [Char]
".janno row-merge error: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
err
        Right JannoRow
r  -> do
            let newFields :: NamedRecord
newFields = (ByteString -> ByteString -> Maybe ByteString)
-> NamedRecord -> NamedRecord -> NamedRecord
forall k v w.
(Eq k, Hashable k) =>
(v -> w -> Maybe v) -> HashMap k v -> HashMap k w -> HashMap k v
HM.differenceWith (\ByteString
v1 ByteString
v2 -> if ByteString
v1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
v2 then Maybe ByteString
forall a. Maybe a
Nothing else ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
v1) NamedRecord
newRowRecord NamedRecord
targetComplete
            if NamedRecord -> Bool
forall k v. HashMap k v -> Bool
HM.null NamedRecord
newFields then do
                [Char] -> PoseidonIO ()
logDebug [Char]
"-- no changes"
            else do
                [(ByteString, ByteString)]
-> ((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (NamedRecord -> [(ByteString, ByteString)]
forall k v. HashMap k v -> [(k, v)]
HM.toList NamedRecord
newFields) (((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ())
-> ((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ \(ByteString
key, ByteString
val) -> do
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
R.modifyIORef IORef Int
cp (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                    [Char] -> PoseidonIO ()
logDebug ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"-- copied \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack ByteString
val [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\" from column " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack ByteString
key
            JannoRow -> ReaderT Env IO JannoRow
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return JannoRow
r
  where
    targetRowRecord :: Csv.NamedRecord
    targetRowRecord :: NamedRecord
targetRowRecord = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
targetRow
    sourceRowRecord :: Csv.NamedRecord
    sourceRowRecord :: NamedRecord
sourceRowRecord = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
sourceRow
    determineDesiredSourceKeys :: [BSC.ByteString] -> CoalesceJannoColumnSpec -> [BSC.ByteString]
    determineDesiredSourceKeys :: [ByteString] -> CoalesceJannoColumnSpec -> [ByteString]
determineDesiredSourceKeys [ByteString]
keys  CoalesceJannoColumnSpec
AllJannoColumns               = [ByteString]
keys
    determineDesiredSourceKeys [ByteString]
_    (IncludeJannoColumns [ByteString]
included) = [ByteString]
included
    determineDesiredSourceKeys [ByteString]
keys (ExcludeJannoColumns [ByteString]
excluded) = [ByteString]
keys [ByteString] -> [ByteString] -> [ByteString]
forall a. Eq a => [a] -> [a] -> [a]
\\ [ByteString]
excluded
    fillFromSource :: BSC.ByteString -> BSC.ByteString -> BSC.ByteString
    fillFromSource :: ByteString -> ByteString -> ByteString
fillFromSource ByteString
key ByteString
targetVal =
           -- don't overwrite key
        if ByteString
key ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= [Char] -> ByteString
BSC.pack [Char]
tKey
           -- overwrite field only if it's requested
           Bool -> Bool -> Bool
&& ByteString -> CoalesceJannoColumnSpec -> Bool
includeField ByteString
key CoalesceJannoColumnSpec
fields
           -- overwrite only empty fields, except overwrite is set
           Bool -> Bool -> Bool
&& (ByteString
targetVal ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString
"n/a", ByteString
"", ByteString
BSC.empty] Bool -> Bool -> Bool
|| Bool
overwrite)
        then ByteString -> ByteString -> NamedRecord -> ByteString
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
HM.findWithDefault ByteString
"" ByteString
key NamedRecord
sourceRowRecord
        else ByteString
targetVal
    includeField :: BSC.ByteString -> CoalesceJannoColumnSpec -> Bool
    includeField :: ByteString -> CoalesceJannoColumnSpec -> Bool
includeField ByteString
_    CoalesceJannoColumnSpec
AllJannoColumns         = Bool
True
    includeField ByteString
key (IncludeJannoColumns [ByteString]
xs) = ByteString
key ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
xs
    includeField ByteString
key (ExcludeJannoColumns [ByteString]
xs) = ByteString
key ByteString -> [ByteString] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [ByteString]
xs