{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE OverloadedStrings #-}

module Poseidon.CLI.Validate where

import           Poseidon.BibFile          (readBibTeXFile)
import           Poseidon.GenotypeData     (GenotypeDataSpec (..),
                                            GenotypeFileSpec (..))
import           Poseidon.Janno            (JannoRows (..), readJannoFile)
import           Poseidon.Package          (PackageReadOptions (..),
                                            PoseidonException (..),
                                            PoseidonYamlStruct (..),
                                            defaultPackageReadOptions,
                                            getJointIndividualInfo,
                                            makePseudoPackageFromGenotypeData,
                                            readPoseidonPackageCollectionWithSkipIndicator,
                                            validateGeno)
import           Poseidon.SequencingSource (SeqSourceRows (..),
                                            readSeqSourceFile)
import           Poseidon.Utils            (PoseidonIO, logError, logInfo)

import           Control.Monad             (unless)
import           Control.Monad.Catch       (throwM)
import           Control.Monad.IO.Class    (liftIO)
import           Control.Monad.List        (forM_)
import qualified Data.ByteString           as B
import           Data.List                 (groupBy, intercalate, sortOn)
import           Data.Yaml                 (decodeEither')
import           Poseidon.EntityTypes      (IndividualInfo (..))
import           System.Exit               (exitFailure, exitSuccess)

-- | A datatype representing command line options for the validate command
data ValidateOptions = ValidateOptions
    { ValidateOptions -> ValidatePlan
_validatePlan       :: ValidatePlan
    , ValidateOptions -> Bool
_validateNoExitCode :: Bool
    , ValidateOptions -> Bool
_validateOnlyLatest :: Bool
    }

data ValidatePlan =
      ValPlanBaseDirs {
          ValidatePlan -> [[Char]]
_valPlanBaseDirs         :: [FilePath]
        , ValidatePlan -> Bool
_valPlanIgnoreGeno       :: Bool
        , ValidatePlan -> Bool
_valPlanFullGeno         :: Bool
        , ValidatePlan -> Bool
_valPlanIgnoreDuplicates :: Bool
        , ValidatePlan -> Bool
_valPlanIgnoreChecksums  :: Bool
        , ValidatePlan -> Bool
_valPlanIgnorePosVersion :: Bool
      }
    | ValPlanPoseidonYaml FilePath
    | ValPlanGeno GenotypeDataSpec
    | ValPlanJanno FilePath
    | ValPlanSSF FilePath
    | ValPlanBib FilePath

runValidate :: ValidateOptions -> PoseidonIO ()
runValidate :: ValidateOptions -> PoseidonIO ()
runValidate (ValidateOptions
    (ValPlanBaseDirs [[Char]]
baseDirs Bool
ignoreGeno Bool
fullGeno Bool
ignoreDup Bool
ignoreChecksums Bool
ignorePosVersion)
    Bool
noExitCode Bool
onlyLatest) = do
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [[Char]] -> [Char]
forall a. [a] -> [[a]] -> [a]
intercalate [Char]
", " [[Char]]
baseDirs
    let pacReadOpts :: PackageReadOptions
pacReadOpts = PackageReadOptions
defaultPackageReadOptions {
          _readOptIgnoreChecksums :: Bool
_readOptIgnoreChecksums  = Bool
ignoreChecksums
        , _readOptGenoCheck :: Bool
_readOptGenoCheck        = Bool
True
        , _readOptIgnoreGeno :: Bool
_readOptIgnoreGeno       = Bool
ignoreGeno
        , _readOptFullGeno :: Bool
_readOptFullGeno         = Bool
fullGeno
        , _readOptIgnorePosVersion :: Bool
_readOptIgnorePosVersion = Bool
ignorePosVersion
        , _readOptOnlyLatest :: Bool
_readOptOnlyLatest       = Bool
onlyLatest
        }
    -- load all packages
    ([PoseidonPackage]
allPackages, Bool
packagesSkipped) <- PackageReadOptions
-> [[Char]] -> PoseidonIO ([PoseidonPackage], Bool)
readPoseidonPackageCollectionWithSkipIndicator PackageReadOptions
pacReadOpts [[Char]]
baseDirs
    -- stop on duplicates
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
ignoreDup (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
        ([IndividualInfo]
allInds, [Bool]
_) <- [PoseidonPackage] -> ReaderT Env IO ([IndividualInfo], [Bool])
forall (m :: * -> *).
MonadThrow m =>
[PoseidonPackage] -> m ([IndividualInfo], [Bool])
getJointIndividualInfo [PoseidonPackage]
allPackages
        let duplicateGroups :: [[IndividualInfo]]
duplicateGroups =   ([IndividualInfo] -> Bool)
-> [[IndividualInfo]] -> [[IndividualInfo]]
forall a. (a -> Bool) -> [a] -> [a]
filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
1) (Int -> Bool)
-> ([IndividualInfo] -> Int) -> [IndividualInfo] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [IndividualInfo] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length)
                              ([[IndividualInfo]] -> [[IndividualInfo]])
-> ([IndividualInfo] -> [[IndividualInfo]])
-> [IndividualInfo]
-> [[IndividualInfo]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IndividualInfo -> IndividualInfo -> Bool)
-> [IndividualInfo] -> [[IndividualInfo]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (\IndividualInfo
a IndividualInfo
b -> IndividualInfo -> [Char]
indInfoName IndividualInfo
a [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== IndividualInfo -> [Char]
indInfoName IndividualInfo
b)
                              ([IndividualInfo] -> [[IndividualInfo]])
-> ([IndividualInfo] -> [IndividualInfo])
-> [IndividualInfo]
-> [[IndividualInfo]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IndividualInfo -> [Char]) -> [IndividualInfo] -> [IndividualInfo]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn IndividualInfo -> [Char]
indInfoName ([IndividualInfo] -> [[IndividualInfo]])
-> [IndividualInfo] -> [[IndividualInfo]]
forall a b. (a -> b) -> a -> b
$ [IndividualInfo]
allInds
        Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([[IndividualInfo]] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[IndividualInfo]]
duplicateGroups) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
            [Char] -> PoseidonIO ()
logError [Char]
"There are duplicated individuals in this package collection. \
                     \Set --ignoreDuplicates to ignore this issue."
            [[IndividualInfo]]
-> ([IndividualInfo] -> PoseidonIO ()) -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [[IndividualInfo]]
duplicateGroups (([IndividualInfo] -> PoseidonIO ()) -> PoseidonIO ())
-> ([IndividualInfo] -> PoseidonIO ()) -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ \[IndividualInfo]
xs -> do
                [Char] -> PoseidonIO ()
logError ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Duplicate individual " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [Char]
forall a. Show a => a -> [Char]
show (IndividualInfo -> [Char]
indInfoName (IndividualInfo -> [Char]) -> IndividualInfo -> [Char]
forall a b. (a -> b) -> a -> b
$ [IndividualInfo] -> IndividualInfo
forall a. HasCallStack => [a] -> a
head [IndividualInfo]
xs)
                [IndividualInfo]
-> (IndividualInfo -> PoseidonIO ()) -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [IndividualInfo]
xs ((IndividualInfo -> PoseidonIO ()) -> PoseidonIO ())
-> (IndividualInfo -> PoseidonIO ()) -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ \IndividualInfo
x -> do
                    [Char] -> PoseidonIO ()
logError ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"  " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ IndividualInfo -> [Char]
forall a. Show a => a -> [Char]
show IndividualInfo
x
            PoseidonException -> PoseidonIO ()
forall e a. Exception e => e -> ReaderT Env IO a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> PoseidonIO ())
-> ([Char] -> PoseidonException) -> [Char] -> PoseidonIO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonCollectionException ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Detected duplicate individuals."
    -- fail the validation if not all POSEIDON.yml files yielded a clean package
    Bool -> Bool -> PoseidonIO ()
conclude (Bool -> Bool
not Bool
packagesSkipped) Bool
noExitCode
runValidate (ValidateOptions (ValPlanPoseidonYaml [Char]
path) Bool
noExitCode Bool
_) = do
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
    ByteString
bs <- IO ByteString -> ReaderT Env IO ByteString
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ByteString -> ReaderT Env IO ByteString)
-> IO ByteString -> ReaderT Env IO ByteString
forall a b. (a -> b) -> a -> b
$ [Char] -> IO ByteString
B.readFile [Char]
path
    PoseidonYamlStruct
yml <- case ByteString -> Either ParseException PoseidonYamlStruct
forall a. FromJSON a => ByteString -> Either ParseException a
decodeEither' ByteString
bs of
        Left ParseException
err  -> PoseidonException -> ReaderT Env IO PoseidonYamlStruct
forall e a. Exception e => e -> ReaderT Env IO a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> ReaderT Env IO PoseidonYamlStruct)
-> PoseidonException -> ReaderT Env IO PoseidonYamlStruct
forall a b. (a -> b) -> a -> b
$ [Char] -> ParseException -> PoseidonException
PoseidonYamlParseException [Char]
path ParseException
err
        Right PoseidonYamlStruct
pac -> PoseidonYamlStruct -> ReaderT Env IO PoseidonYamlStruct
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (PoseidonYamlStruct
pac :: PoseidonYamlStruct)
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Read .yml file of package " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ PoseidonYamlStruct -> [Char]
_posYamlTitle PoseidonYamlStruct
yml
    Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanGeno GenotypeDataSpec
geno) Bool
noExitCode Bool
_) = do
    let gFile :: [Char]
gFile = case GenotypeDataSpec -> GenotypeFileSpec
genotypeFileSpec GenotypeDataSpec
geno of
            GenotypeEigenstrat [Char]
gf Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ -> [Char]
gf
            GenotypePlink      [Char]
gf Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ -> [Char]
gf
            GenotypeVCF        [Char]
gf Maybe [Char]
_         -> [Char]
gf
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
gFile
    PoseidonPackage
pac <- GenotypeDataSpec -> PoseidonIO PoseidonPackage
makePseudoPackageFromGenotypeData GenotypeDataSpec
geno
    PoseidonPackage -> Bool -> PoseidonIO ()
validateGeno PoseidonPackage
pac Bool
True
    Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanJanno [Char]
path) Bool
noExitCode Bool
_) = do
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
    (JannoRows [JannoRow]
entries) <- [Char] -> PoseidonIO JannoRows
readJannoFile [Char]
path
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"All " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([JannoRow] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [JannoRow]
entries) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" entries are valid"
    Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanSSF [Char]
path) Bool
noExitCode Bool
_) = do
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
    (SeqSourceRows [SeqSourceRow]
entries) <- [Char] -> PoseidonIO SeqSourceRows
readSeqSourceFile [Char]
path
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"All " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([SeqSourceRow] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SeqSourceRow]
entries) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" entries are valid"
    Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanBib [Char]
path) Bool
noExitCode Bool
_) = do
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
    BibTeX
entries <- IO BibTeX -> ReaderT Env IO BibTeX
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO BibTeX -> ReaderT Env IO BibTeX)
-> IO BibTeX -> ReaderT Env IO BibTeX
forall a b. (a -> b) -> a -> b
$ [Char] -> IO BibTeX
readBibTeXFile [Char]
path
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"All " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show (BibTeX -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length BibTeX
entries) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" entries are valid"
    Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode

conclude :: Bool -> Bool -> PoseidonIO ()
conclude :: Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode = do
    [Char] -> PoseidonIO ()
logInfo [Char]
"Validation passed"
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
noExitCode (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO ()
forall a. IO a
exitSuccess
conclude Bool
False Bool
noExitCode = do
    [Char] -> PoseidonIO ()
logError [Char]
"Validation failed"
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
noExitCode (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO ()
forall a. IO a
exitFailure