{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Poseidon.CLI.Validate where
import Poseidon.BibFile (readBibTeXFile)
import Poseidon.GenotypeData (GenotypeDataSpec (..),
GenotypeFileSpec (..))
import Poseidon.Janno (JannoRows (..), readJannoFile)
import Poseidon.Package (PackageReadOptions (..),
PoseidonException (..),
PoseidonYamlStruct (..),
defaultPackageReadOptions,
getJointIndividualInfo,
makePseudoPackageFromGenotypeData,
readPoseidonPackageCollectionWithSkipIndicator,
validateGeno)
import Poseidon.SequencingSource (SeqSourceRows (..),
readSeqSourceFile)
import Poseidon.Utils (PoseidonIO, logError, logInfo)
import Control.Monad (unless)
import Control.Monad.Catch (throwM)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.List (forM_)
import qualified Data.ByteString as B
import Data.List (groupBy, intercalate, sortOn)
import Data.Yaml (decodeEither')
import Poseidon.EntityTypes (IndividualInfo (..))
import System.Exit (exitFailure, exitSuccess)
data ValidateOptions = ValidateOptions
{ ValidateOptions -> ValidatePlan
_validatePlan :: ValidatePlan
, ValidateOptions -> Bool
_validateNoExitCode :: Bool
, ValidateOptions -> Bool
_validateOnlyLatest :: Bool
}
data ValidatePlan =
ValPlanBaseDirs {
ValidatePlan -> [[Char]]
_valPlanBaseDirs :: [FilePath]
, ValidatePlan -> Bool
_valPlanIgnoreGeno :: Bool
, ValidatePlan -> Bool
_valPlanFullGeno :: Bool
, ValidatePlan -> Bool
_valPlanIgnoreDuplicates :: Bool
, ValidatePlan -> Bool
_valPlanIgnoreChecksums :: Bool
, ValidatePlan -> Bool
_valPlanIgnorePosVersion :: Bool
}
| ValPlanPoseidonYaml FilePath
| ValPlanGeno GenotypeDataSpec
| ValPlanJanno FilePath
| ValPlanSSF FilePath
| ValPlanBib FilePath
runValidate :: ValidateOptions -> PoseidonIO ()
runValidate :: ValidateOptions -> PoseidonIO ()
runValidate (ValidateOptions
(ValPlanBaseDirs [[Char]]
baseDirs Bool
ignoreGeno Bool
fullGeno Bool
ignoreDup Bool
ignoreChecksums Bool
ignorePosVersion)
Bool
noExitCode Bool
onlyLatest) = do
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [[Char]] -> [Char]
forall a. [a] -> [[a]] -> [a]
intercalate [Char]
", " [[Char]]
baseDirs
let pacReadOpts :: PackageReadOptions
pacReadOpts = PackageReadOptions
defaultPackageReadOptions {
_readOptIgnoreChecksums :: Bool
_readOptIgnoreChecksums = Bool
ignoreChecksums
, _readOptGenoCheck :: Bool
_readOptGenoCheck = Bool
True
, _readOptIgnoreGeno :: Bool
_readOptIgnoreGeno = Bool
ignoreGeno
, _readOptFullGeno :: Bool
_readOptFullGeno = Bool
fullGeno
, _readOptIgnorePosVersion :: Bool
_readOptIgnorePosVersion = Bool
ignorePosVersion
, _readOptOnlyLatest :: Bool
_readOptOnlyLatest = Bool
onlyLatest
}
([PoseidonPackage]
allPackages, Bool
packagesSkipped) <- PackageReadOptions
-> [[Char]] -> PoseidonIO ([PoseidonPackage], Bool)
readPoseidonPackageCollectionWithSkipIndicator PackageReadOptions
pacReadOpts [[Char]]
baseDirs
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
ignoreDup (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
([IndividualInfo]
allInds, [Bool]
_) <- [PoseidonPackage] -> ReaderT Env IO ([IndividualInfo], [Bool])
forall (m :: * -> *).
MonadThrow m =>
[PoseidonPackage] -> m ([IndividualInfo], [Bool])
getJointIndividualInfo [PoseidonPackage]
allPackages
let duplicateGroups :: [[IndividualInfo]]
duplicateGroups = ([IndividualInfo] -> Bool)
-> [[IndividualInfo]] -> [[IndividualInfo]]
forall a. (a -> Bool) -> [a] -> [a]
filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
1) (Int -> Bool)
-> ([IndividualInfo] -> Int) -> [IndividualInfo] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [IndividualInfo] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length)
([[IndividualInfo]] -> [[IndividualInfo]])
-> ([IndividualInfo] -> [[IndividualInfo]])
-> [IndividualInfo]
-> [[IndividualInfo]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IndividualInfo -> IndividualInfo -> Bool)
-> [IndividualInfo] -> [[IndividualInfo]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (\IndividualInfo
a IndividualInfo
b -> IndividualInfo -> [Char]
indInfoName IndividualInfo
a [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== IndividualInfo -> [Char]
indInfoName IndividualInfo
b)
([IndividualInfo] -> [[IndividualInfo]])
-> ([IndividualInfo] -> [IndividualInfo])
-> [IndividualInfo]
-> [[IndividualInfo]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IndividualInfo -> [Char]) -> [IndividualInfo] -> [IndividualInfo]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn IndividualInfo -> [Char]
indInfoName ([IndividualInfo] -> [[IndividualInfo]])
-> [IndividualInfo] -> [[IndividualInfo]]
forall a b. (a -> b) -> a -> b
$ [IndividualInfo]
allInds
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([[IndividualInfo]] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[IndividualInfo]]
duplicateGroups) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
[Char] -> PoseidonIO ()
logError [Char]
"There are duplicated individuals in this package collection. \
\Set --ignoreDuplicates to ignore this issue."
[[IndividualInfo]]
-> ([IndividualInfo] -> PoseidonIO ()) -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [[IndividualInfo]]
duplicateGroups (([IndividualInfo] -> PoseidonIO ()) -> PoseidonIO ())
-> ([IndividualInfo] -> PoseidonIO ()) -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ \[IndividualInfo]
xs -> do
[Char] -> PoseidonIO ()
logError ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Duplicate individual " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [Char]
forall a. Show a => a -> [Char]
show (IndividualInfo -> [Char]
indInfoName (IndividualInfo -> [Char]) -> IndividualInfo -> [Char]
forall a b. (a -> b) -> a -> b
$ [IndividualInfo] -> IndividualInfo
forall a. HasCallStack => [a] -> a
head [IndividualInfo]
xs)
[IndividualInfo]
-> (IndividualInfo -> PoseidonIO ()) -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [IndividualInfo]
xs ((IndividualInfo -> PoseidonIO ()) -> PoseidonIO ())
-> (IndividualInfo -> PoseidonIO ()) -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ \IndividualInfo
x -> do
[Char] -> PoseidonIO ()
logError ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
" " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ IndividualInfo -> [Char]
forall a. Show a => a -> [Char]
show IndividualInfo
x
PoseidonException -> PoseidonIO ()
forall e a. Exception e => e -> ReaderT Env IO a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> PoseidonIO ())
-> ([Char] -> PoseidonException) -> [Char] -> PoseidonIO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonCollectionException ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Detected duplicate individuals."
Bool -> Bool -> PoseidonIO ()
conclude (Bool -> Bool
not Bool
packagesSkipped) Bool
noExitCode
runValidate (ValidateOptions (ValPlanPoseidonYaml [Char]
path) Bool
noExitCode Bool
_) = do
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
ByteString
bs <- IO ByteString -> ReaderT Env IO ByteString
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ByteString -> ReaderT Env IO ByteString)
-> IO ByteString -> ReaderT Env IO ByteString
forall a b. (a -> b) -> a -> b
$ [Char] -> IO ByteString
B.readFile [Char]
path
PoseidonYamlStruct
yml <- case ByteString -> Either ParseException PoseidonYamlStruct
forall a. FromJSON a => ByteString -> Either ParseException a
decodeEither' ByteString
bs of
Left ParseException
err -> PoseidonException -> ReaderT Env IO PoseidonYamlStruct
forall e a. Exception e => e -> ReaderT Env IO a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> ReaderT Env IO PoseidonYamlStruct)
-> PoseidonException -> ReaderT Env IO PoseidonYamlStruct
forall a b. (a -> b) -> a -> b
$ [Char] -> ParseException -> PoseidonException
PoseidonYamlParseException [Char]
path ParseException
err
Right PoseidonYamlStruct
pac -> PoseidonYamlStruct -> ReaderT Env IO PoseidonYamlStruct
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (PoseidonYamlStruct
pac :: PoseidonYamlStruct)
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Read .yml file of package " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ PoseidonYamlStruct -> [Char]
_posYamlTitle PoseidonYamlStruct
yml
Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanGeno GenotypeDataSpec
geno) Bool
noExitCode Bool
_) = do
let gFile :: [Char]
gFile = case GenotypeDataSpec -> GenotypeFileSpec
genotypeFileSpec GenotypeDataSpec
geno of
GenotypeEigenstrat [Char]
gf Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ -> [Char]
gf
GenotypePlink [Char]
gf Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ -> [Char]
gf
GenotypeVCF [Char]
gf Maybe [Char]
_ -> [Char]
gf
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
gFile
PoseidonPackage
pac <- GenotypeDataSpec -> PoseidonIO PoseidonPackage
makePseudoPackageFromGenotypeData GenotypeDataSpec
geno
PoseidonPackage -> Bool -> PoseidonIO ()
validateGeno PoseidonPackage
pac Bool
True
Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanJanno [Char]
path) Bool
noExitCode Bool
_) = do
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
(JannoRows [JannoRow]
entries) <- [Char] -> PoseidonIO JannoRows
readJannoFile [Char]
path
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"All " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([JannoRow] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [JannoRow]
entries) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" entries are valid"
Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanSSF [Char]
path) Bool
noExitCode Bool
_) = do
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
(SeqSourceRows [SeqSourceRow]
entries) <- [Char] -> PoseidonIO SeqSourceRows
readSeqSourceFile [Char]
path
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"All " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([SeqSourceRow] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SeqSourceRow]
entries) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" entries are valid"
Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
runValidate (ValidateOptions (ValPlanBib [Char]
path) Bool
noExitCode Bool
_) = do
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Validating: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
path
BibTeX
entries <- IO BibTeX -> ReaderT Env IO BibTeX
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO BibTeX -> ReaderT Env IO BibTeX)
-> IO BibTeX -> ReaderT Env IO BibTeX
forall a b. (a -> b) -> a -> b
$ [Char] -> IO BibTeX
readBibTeXFile [Char]
path
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"All " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show (BibTeX -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length BibTeX
entries) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" entries are valid"
Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode
conclude :: Bool -> Bool -> PoseidonIO ()
conclude :: Bool -> Bool -> PoseidonIO ()
conclude Bool
True Bool
noExitCode = do
[Char] -> PoseidonIO ()
logInfo [Char]
"Validation passed"
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
noExitCode (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO ()
forall a. IO a
exitSuccess
conclude Bool
False Bool
noExitCode = do
[Char] -> PoseidonIO ()
logError [Char]
"Validation failed"
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
noExitCode (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO ()
forall a. IO a
exitFailure