{-# LANGUAGE OverloadedStrings #-}

module Poseidon.CLI.Forge where

import           Poseidon.BibFile            (BibEntry (..), BibTeX,
                                              writeBibTeXFile)
import           Poseidon.EntityTypes        (EntityInput,
                                              PacNameAndVersion (..),
                                              PoseidonEntity (..),
                                              SignedEntity (..),
                                              checkIfAllEntitiesExist,
                                              isLatestInCollection,
                                              makePacNameAndVersion,
                                              readEntityInputs,
                                              resolveUniqueEntityIndices)
import           Poseidon.GenotypeData       (GenoDataSource (..),
                                              GenotypeDataSpec (..),
                                              GenotypeFileSpec (..),
                                              SNPSetSpec (..),
                                              printSNPCopyProgress,
                                              selectIndices, snpSetMergeList)
import           Poseidon.Janno              (JannoRow (..), JannoRows (..),
                                              ListColumn (..),
                                              getMaybeListColumn,
                                              jannoRows2EigenstratIndEntries,
                                              writeJannoFile)
import           Poseidon.Package            (PackageReadOptions (..),
                                              PoseidonPackage (..),
                                              defaultPackageReadOptions,
                                              filterToRelevantPackages,
                                              getJointGenotypeData,
                                              getJointIndividualInfo,
                                              getJointJanno,
                                              makePseudoPackageFromGenotypeData,
                                              newMinimalPackageTemplate,
                                              newPackageTemplate,
                                              readPoseidonPackageCollection,
                                              writePoseidonPackage)
import           Poseidon.SequencingSource   (SeqSourceRow (..),
                                              SeqSourceRows (..),
                                              writeSeqSourceFile)
import           Poseidon.Utils              (PoseidonException (..),
                                              PoseidonIO, checkFile,
                                              determinePackageOutName,
                                              envErrorLength, envLogAction,
                                              logInfo, logWarning, uniqueRO)

import           Control.Exception           (catch, throwIO)
import           Control.Monad               (filterM, forM, forM_, unless,
                                              when)
import           Data.List                   (intercalate, nub)
import           Data.Maybe                  (mapMaybe)
import           Data.Time                   (getCurrentTime)
import qualified Data.Vector                 as V
import qualified Data.Vector.Unboxed         as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import           Pipes                       (MonadIO (liftIO), cat, (>->))
import qualified Pipes.Prelude               as P
import           Pipes.Safe                  (SafeT, runSafeT)
import           Poseidon.ColumnTypes        (JannoNrSNPs (..))
import           SequenceFormats.Eigenstrat  (EigenstratSnpEntry (..),
                                              GenoEntry (..), GenoLine,
                                              writeEigenstrat)
import           SequenceFormats.Plink       (PlinkPopNameMode,
                                              eigenstratInd2PlinkFam,
                                              writePlink)
import           System.Directory            (copyFile,
                                              createDirectoryIfMissing)
import           System.FilePath             (dropTrailingPathSeparator, (<.>),
                                              (</>))

-- | A datatype representing command line options for the survey command
data ForgeOptions = ForgeOptions
    { ForgeOptions -> [GenoDataSource]
_forgeGenoSources        :: [GenoDataSource]
    -- Empty list = forge all packages
    , ForgeOptions -> [EntityInput SignedEntity]
_forgeEntityInput        :: [EntityInput SignedEntity] -- Empty list = forge all packages
    , ForgeOptions -> Maybe [Char]
_forgeSnpFile            :: Maybe FilePath
    , ForgeOptions -> Bool
_forgeIntersect          :: Bool
    , ForgeOptions -> [Char]
_forgeOutFormat          :: String
    , ForgeOptions -> ForgeOutMode
_forgeOutMode            :: ForgeOutMode
    , ForgeOptions -> Bool
_forgeOutZip             :: Bool
    , ForgeOptions -> [Char]
_forgeOutPacPath         :: FilePath
    , ForgeOptions -> Maybe [Char]
_forgeOutPacName         :: Maybe String
    , ForgeOptions -> Bool
_forgePackageWise        :: Bool
    , ForgeOptions -> PlinkPopNameMode
_forgeOutputPlinkPopMode :: PlinkPopNameMode
    , ForgeOptions -> Bool
_forgeOutputOrdered      :: Bool
    }

-- | Different output modes ordered from more minimal to more complete
data ForgeOutMode =
      GenoOut
    | MinimalOut
    | PreservePymlOut
    | NormalOut

preservePyml :: ForgeOutMode -> Bool
preservePyml :: ForgeOutMode -> Bool
preservePyml ForgeOutMode
PreservePymlOut = Bool
True
preservePyml ForgeOutMode
_               = Bool
False

pacReadOpts :: PackageReadOptions
pacReadOpts :: PackageReadOptions
pacReadOpts = PackageReadOptions
defaultPackageReadOptions {
      _readOptIgnoreChecksums :: Bool
_readOptIgnoreChecksums  = Bool
True
    , _readOptIgnoreGeno :: Bool
_readOptIgnoreGeno       = Bool
False
    , _readOptGenoCheck :: Bool
_readOptGenoCheck        = Bool
True
    }

-- | The main function running the forge command
runForge :: ForgeOptions -> PoseidonIO ()
runForge :: ForgeOptions -> PoseidonIO ()
runForge (
    ForgeOptions [GenoDataSource]
genoSources
                 [EntityInput SignedEntity]
entityInputs Maybe [Char]
maybeSnpFile Bool
intersect_
                 [Char]
outFormat ForgeOutMode
outMode Bool
outZip [Char]
outPathRaw Maybe [Char]
maybeOutName
                 Bool
packageWise PlinkPopNameMode
outPlinkPopMode
                 Bool
outputOrdered
    ) = do

    -- load packages --
    [PoseidonPackage]
properPackages <- PackageReadOptions -> [[Char]] -> PoseidonIO [PoseidonPackage]
readPoseidonPackageCollection PackageReadOptions
pacReadOpts ([[Char]] -> PoseidonIO [PoseidonPackage])
-> [[Char]] -> PoseidonIO [PoseidonPackage]
forall a b. (a -> b) -> a -> b
$ [GenoDataSource -> [Char]
getPacBaseDir GenoDataSource
x | x :: GenoDataSource
x@PacBaseDir {} <- [GenoDataSource]
genoSources]
    [PoseidonPackage]
pseudoPackages <- (GenotypeDataSpec -> ReaderT Env IO PoseidonPackage)
-> [GenotypeDataSpec] -> PoseidonIO [PoseidonPackage]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM GenotypeDataSpec -> ReaderT Env IO PoseidonPackage
makePseudoPackageFromGenotypeData [GenoDataSource -> GenotypeDataSpec
getGenoDirect GenoDataSource
x | x :: GenoDataSource
x@GenoDirect {} <- [GenoDataSource]
genoSources]
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Unpackaged genotype data files loaded: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([PoseidonPackage] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [PoseidonPackage]
pseudoPackages)
    let allPackages :: [PoseidonPackage]
allPackages = [PoseidonPackage]
properPackages [PoseidonPackage] -> [PoseidonPackage] -> [PoseidonPackage]
forall a. [a] -> [a] -> [a]
++ [PoseidonPackage]
pseudoPackages

    -- compile entities
    [SignedEntity]
entitiesUser <- [EntityInput SignedEntity] -> ReaderT Env IO [SignedEntity]
forall (m :: * -> *) a.
(MonadIO m, EntitySpec a, Eq a) =>
[EntityInput a] -> m [a]
readEntityInputs [EntityInput SignedEntity]
entityInputs

    [PoseidonPackage]
allLatestPackages <- (PoseidonPackage -> ReaderT Env IO Bool)
-> [PoseidonPackage] -> PoseidonIO [PoseidonPackage]
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM ([PoseidonPackage] -> PoseidonPackage -> ReaderT Env IO Bool
forall (m :: * -> *) a.
(MonadThrow m, HasNameAndVersion a) =>
[a] -> a -> m Bool
isLatestInCollection [PoseidonPackage]
allPackages) [PoseidonPackage]
allPackages
    [SignedEntity]
entities <- case [SignedEntity]
entitiesUser of
        [] -> do
            [Char] -> PoseidonIO ()
logInfo [Char]
"No requested entities. Implicitly forging all packages."
            [SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([SignedEntity] -> ReaderT Env IO [SignedEntity])
-> [SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> SignedEntity)
-> [PoseidonPackage] -> [SignedEntity]
forall a b. (a -> b) -> [a] -> [b]
map (PoseidonEntity -> SignedEntity
Include (PoseidonEntity -> SignedEntity)
-> (PoseidonPackage -> PoseidonEntity)
-> PoseidonPackage
-> SignedEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PacNameAndVersion -> PoseidonEntity
Pac (PacNameAndVersion -> PoseidonEntity)
-> (PoseidonPackage -> PacNameAndVersion)
-> PoseidonPackage
-> PoseidonEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> PacNameAndVersion
forall a. HasNameAndVersion a => a -> PacNameAndVersion
makePacNameAndVersion) [PoseidonPackage]
allLatestPackages
        (Include PoseidonEntity
_:[SignedEntity]
_) -> do
            [SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [SignedEntity]
entitiesUser
        (Exclude PoseidonEntity
_:[SignedEntity]
_) -> do
            -- fill entitiesToInclude with all packages, if entitiesInput starts with an Exclude
            [Char] -> PoseidonIO ()
logInfo [Char]
"forge entities begin with exclude, so implicitly adding all packages \
                \(latest versions) as includes before applying excludes."
            -- add all latest packages to the front of the list
            [SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([SignedEntity] -> ReaderT Env IO [SignedEntity])
-> [SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> SignedEntity)
-> [PoseidonPackage] -> [SignedEntity]
forall a b. (a -> b) -> [a] -> [b]
map (PoseidonEntity -> SignedEntity
Include (PoseidonEntity -> SignedEntity)
-> (PoseidonPackage -> PoseidonEntity)
-> PoseidonPackage
-> SignedEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PacNameAndVersion -> PoseidonEntity
Pac (PacNameAndVersion -> PoseidonEntity)
-> (PoseidonPackage -> PacNameAndVersion)
-> PoseidonPackage
-> PoseidonEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> PacNameAndVersion
forall a. HasNameAndVersion a => a -> PacNameAndVersion
makePacNameAndVersion) [PoseidonPackage]
allLatestPackages [SignedEntity] -> [SignedEntity] -> [SignedEntity]
forall a. [a] -> [a] -> [a]
++ [SignedEntity]
entitiesUser
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Forging with the following entity-list: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([Char] -> [[Char]] -> [Char]
forall a. [a] -> [[a]] -> [a]
intercalate [Char]
", " ([[Char]] -> [Char])
-> ([SignedEntity] -> [[Char]]) -> [SignedEntity] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (SignedEntity -> [Char]) -> [SignedEntity] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map SignedEntity -> [Char]
forall a. Show a => a -> [Char]
show ([SignedEntity] -> [[Char]])
-> ([SignedEntity] -> [SignedEntity]) -> [SignedEntity] -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [SignedEntity] -> [SignedEntity]
forall a. Int -> [a] -> [a]
take Int
10) [SignedEntity]
entities [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
        if [SignedEntity] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SignedEntity]
entities Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10 then [Char]
" and " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([SignedEntity] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SignedEntity]
entities Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
10) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" more" else [Char]
""

    -- check if all entities can be found. This function reports an error and throws and exception
    [PoseidonPackage] -> ReaderT Env IO IndividualInfoCollection
forall (m :: * -> *).
MonadThrow m =>
[PoseidonPackage] -> m IndividualInfoCollection
getJointIndividualInfo [PoseidonPackage]
allPackages ReaderT Env IO IndividualInfoCollection
-> (IndividualInfoCollection -> PoseidonIO ()) -> PoseidonIO ()
forall a b.
ReaderT Env IO a -> (a -> ReaderT Env IO b) -> ReaderT Env IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SignedEntity] -> IndividualInfoCollection -> PoseidonIO ()
forall a.
EntitySpec a =>
[a] -> IndividualInfoCollection -> PoseidonIO ()
checkIfAllEntitiesExist [SignedEntity]
entities
    -- determine relevant packages
    [PoseidonPackage]
relevantPackages <- [SignedEntity] -> [PoseidonPackage] -> PoseidonIO [PoseidonPackage]
forall (m :: * -> *) a.
(MonadThrow m, EntitySpec a) =>
[a] -> [PoseidonPackage] -> m [PoseidonPackage]
filterToRelevantPackages [SignedEntity]
entities [PoseidonPackage]
allPackages
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ (Int -> [Char]
forall a. Show a => a -> [Char]
show (Int -> [Char])
-> ([PoseidonPackage] -> Int) -> [PoseidonPackage] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PoseidonPackage] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([PoseidonPackage] -> [Char]) -> [PoseidonPackage] -> [Char]
forall a b. (a -> b) -> a -> b
$ [PoseidonPackage]
relevantPackages) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" packages contain data for this forging operation"
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([PoseidonPackage] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [PoseidonPackage]
relevantPackages) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO PoseidonException
PoseidonEmptyForgeException
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([PoseidonPackage] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [PoseidonPackage]
relevantPackages Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 Bool -> Bool -> Bool
&& ForgeOutMode -> Bool
preservePyml ForgeOutMode
outMode) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO PoseidonException
PoseidonCantPreserveException

    -- get all individuals from the relevant packages
    IndividualInfoCollection
indInfoCollection <- [PoseidonPackage] -> ReaderT Env IO IndividualInfoCollection
forall (m :: * -> *).
MonadThrow m =>
[PoseidonPackage] -> m IndividualInfoCollection
getJointIndividualInfo [PoseidonPackage]
relevantPackages

    -- set entities to only packages, if --packagewise is set
    let relevantEntities :: [SignedEntity]
relevantEntities =
            if Bool
packageWise
            then (PoseidonPackage -> SignedEntity)
-> [PoseidonPackage] -> [SignedEntity]
forall a b. (a -> b) -> [a] -> [b]
map (PoseidonEntity -> SignedEntity
Include (PoseidonEntity -> SignedEntity)
-> (PoseidonPackage -> PoseidonEntity)
-> PoseidonPackage
-> SignedEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PacNameAndVersion -> PoseidonEntity
Pac (PacNameAndVersion -> PoseidonEntity)
-> (PoseidonPackage -> PacNameAndVersion)
-> PoseidonPackage
-> PoseidonEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> PacNameAndVersion
forall a. HasNameAndVersion a => a -> PacNameAndVersion
makePacNameAndVersion) [PoseidonPackage]
relevantPackages
            else [SignedEntity]
entities

    -- determine indizes of relevant individuals
    [Int]
relevantIndices <- Bool
-> [SignedEntity] -> IndividualInfoCollection -> PoseidonIO [Int]
forall a.
EntitySpec a =>
Bool -> [a] -> IndividualInfoCollection -> PoseidonIO [Int]
resolveUniqueEntityIndices Bool
outputOrdered [SignedEntity]
relevantEntities IndividualInfoCollection
indInfoCollection

    -- collect data --
    -- janno
    let (JannoRows [JannoRow]
jannoRows) = [PoseidonPackage] -> JannoRows
getJointJanno [PoseidonPackage]
relevantPackages
        newJanno :: JannoRows
newJanno@(JannoRows [JannoRow]
relevantJannoRows) = [JannoRow] -> JannoRows
JannoRows ([JannoRow] -> JannoRows) -> [JannoRow] -> JannoRows
forall a b. (a -> b) -> a -> b
$ (Int -> JannoRow) -> [Int] -> [JannoRow]
forall a b. (a -> b) -> [a] -> [b]
map ([JannoRow]
jannoRows [JannoRow] -> Int -> JannoRow
forall a. HasCallStack => [a] -> Int -> a
!!) [Int]
relevantIndices

    -- seqSource
    let seqSourceRows :: SeqSourceRows
seqSourceRows = [SeqSourceRows] -> SeqSourceRows
forall a. Monoid a => [a] -> a
mconcat ([SeqSourceRows] -> SeqSourceRows)
-> [SeqSourceRows] -> SeqSourceRows
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> SeqSourceRows)
-> [PoseidonPackage] -> [SeqSourceRows]
forall a b. (a -> b) -> [a] -> [b]
map PoseidonPackage -> SeqSourceRows
posPacSeqSource [PoseidonPackage]
relevantPackages
        relevantSeqSourceRows :: SeqSourceRows
relevantSeqSourceRows = JannoRows -> SeqSourceRows -> SeqSourceRows
filterSeqSourceRows JannoRows
newJanno SeqSourceRows
seqSourceRows

    -- bib
    let bibEntries :: [BibEntry]
bibEntries = [BibEntry] -> [BibEntry]
forall a. Ord a => [a] -> [a]
uniqueRO ([BibEntry] -> [BibEntry]) -> [BibEntry] -> [BibEntry]
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> [BibEntry]) -> [PoseidonPackage] -> [BibEntry]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap PoseidonPackage -> [BibEntry]
posPacBib [PoseidonPackage]
relevantPackages
        relevantBibEntries :: [BibEntry]
relevantBibEntries = JannoRows -> [BibEntry] -> [BibEntry]
filterBibEntries JannoRows
newJanno [BibEntry]
bibEntries

    -- create new package --
    let outPath :: [Char]
outPath = [Char] -> [Char]
dropTrailingPathSeparator [Char]
outPathRaw
    [Char]
outName <- IO [Char] -> ReaderT Env IO [Char]
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [Char] -> ReaderT Env IO [Char])
-> IO [Char] -> ReaderT Env IO [Char]
forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> [Char] -> IO [Char]
determinePackageOutName Maybe [Char]
maybeOutName [Char]
outPath
    -- create new directory
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Writing to directory (will be created if missing): " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
outPath
    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ Bool -> [Char] -> IO ()
createDirectoryIfMissing Bool
True [Char]
outPath
    -- output warning if any snpSet is set to Other
    [SNPSetSpec]
snpSetList <- [PoseidonPackage] -> PoseidonIO [SNPSetSpec]
fillMissingSnpSets [PoseidonPackage]
relevantPackages
    let newSNPSet :: SNPSetSpec
newSNPSet = case
            Maybe [Char]
maybeSnpFile of
                Maybe [Char]
Nothing -> [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList [SNPSetSpec]
snpSetList Bool
intersect_
                Just [Char]
_  -> SNPSetSpec
SNPSetOther
    -- compile genotype data structure
    let gz :: [Char]
gz = if Bool
outZip then [Char]
"gz" else [Char]
""
    GenotypeFileSpec
genotypeFileData <- case [Char]
outFormat of
            [Char]
"EIGENSTRAT" -> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec)
-> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a b. (a -> b) -> a -> b
$
                [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypeEigenstrat ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".geno" [Char] -> [Char] -> [Char]
<.> [Char]
gz)  Maybe [Char]
forall a. Maybe a
Nothing
                                   ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".snp" [Char] -> [Char] -> [Char]
<.> [Char]
gz)  Maybe [Char]
forall a. Maybe a
Nothing
                                   ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".ind") Maybe [Char]
forall a. Maybe a
Nothing
            [Char]
"PLINK"      -> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec)
-> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a b. (a -> b) -> a -> b
$
                [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypePlink      ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".bed" [Char] -> [Char] -> [Char]
<.> [Char]
gz)  Maybe [Char]
forall a. Maybe a
Nothing
                                   ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".bim" [Char] -> [Char] -> [Char]
<.> [Char]
gz)  Maybe [Char]
forall a. Maybe a
Nothing
                                   ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".fam")  Maybe [Char]
forall a. Maybe a
Nothing
            [Char]
_  -> IO GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec)
-> (PoseidonException -> IO GenotypeFileSpec)
-> PoseidonException
-> ReaderT Env IO GenotypeFileSpec
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO GenotypeFileSpec
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> ReaderT Env IO GenotypeFileSpec)
-> PoseidonException -> ReaderT Env IO GenotypeFileSpec
forall a b. (a -> b) -> a -> b
$
                [Char] -> PoseidonException
PoseidonGenericException ([Char]
"Illegal outFormat " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
outFormat [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
". Only Outformats EIGENSTRAT or PLINK are allowed at the moment")
    let genotypeData :: GenotypeDataSpec
genotypeData = GenotypeFileSpec -> Maybe SNPSetSpec -> GenotypeDataSpec
GenotypeDataSpec GenotypeFileSpec
genotypeFileData (SNPSetSpec -> Maybe SNPSetSpec
forall a. a -> Maybe a
Just SNPSetSpec
newSNPSet)

    -- assemble and write result depending on outMode --
    [Char] -> PoseidonIO ()
logInfo [Char]
"Creating new package entity"
    let pacSource :: PoseidonPackage
pacSource = [PoseidonPackage] -> PoseidonPackage
forall a. HasCallStack => [a] -> a
head [PoseidonPackage]
relevantPackages
    case ForgeOutMode
outMode of
        ForgeOutMode
GenoOut -> do
            IOVector Int
_ <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
            () -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        ForgeOutMode
MinimalOut -> do
            PoseidonPackage
pac <- [Char]
-> [Char] -> GenotypeDataSpec -> ReaderT Env IO PoseidonPackage
forall (m :: * -> *).
MonadThrow m =>
[Char] -> [Char] -> GenotypeDataSpec -> m PoseidonPackage
newMinimalPackageTemplate [Char]
outPath [Char]
outName GenotypeDataSpec
genotypeData
            PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac
            IOVector Int
_ <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
            () -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        ForgeOutMode
PreservePymlOut -> do
            PoseidonPackage
normalPac <- [Char]
-> [Char]
-> GenotypeDataSpec
-> Maybe (Either [EigenstratIndEntry] JannoRows)
-> SeqSourceRows
-> [BibEntry]
-> ReaderT Env IO PoseidonPackage
newPackageTemplate [Char]
outPath [Char]
outName GenotypeDataSpec
genotypeData
                    (Either [EigenstratIndEntry] JannoRows
-> Maybe (Either [EigenstratIndEntry] JannoRows)
forall a. a -> Maybe a
Just (JannoRows -> Either [EigenstratIndEntry] JannoRows
forall a b. b -> Either a b
Right JannoRows
newJanno)) SeqSourceRows
relevantSeqSourceRows [BibEntry]
relevantBibEntries
            let pac :: PoseidonPackage
pac = PoseidonPackage
normalPac {
                    posPacNameAndVersion :: PacNameAndVersion
posPacNameAndVersion = (PoseidonPackage -> PacNameAndVersion
posPacNameAndVersion PoseidonPackage
pacSource) {panavName :: [Char]
panavName = [Char]
outName}
                ,   posPacDescription :: Maybe [Char]
posPacDescription    = PoseidonPackage -> Maybe [Char]
posPacDescription PoseidonPackage
pacSource
                ,   posPacLastModified :: Maybe Day
posPacLastModified   = PoseidonPackage -> Maybe Day
posPacLastModified PoseidonPackage
pacSource
                ,   posPacContributor :: [ContributorSpec]
posPacContributor    = PoseidonPackage -> [ContributorSpec]
posPacContributor PoseidonPackage
pacSource
                ,   posPacReadmeFile :: Maybe [Char]
posPacReadmeFile     = PoseidonPackage -> Maybe [Char]
posPacReadmeFile PoseidonPackage
pacSource
                ,   posPacChangelogFile :: Maybe [Char]
posPacChangelogFile  = PoseidonPackage -> Maybe [Char]
posPacChangelogFile PoseidonPackage
pacSource
                }
            PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac
            [Char] -> [Char] -> SeqSourceRows -> PoseidonIO ()
writeSSFile [Char]
outPath [Char]
outName SeqSourceRows
relevantSeqSourceRows
            [Char] -> [Char] -> [BibEntry] -> PoseidonIO ()
writeBibFile [Char]
outPath [Char]
outName [BibEntry]
relevantBibEntries
            [Char] -> PoseidonPackage -> PoseidonIO ()
copyREADMEFile [Char]
outPath PoseidonPackage
pacSource
            [Char] -> PoseidonPackage -> PoseidonIO ()
copyCHANGELOGFile [Char]
outPath PoseidonPackage
pacSource
            IOVector Int
newNrSnps <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
            [Char] -> [Char] -> IOVector Int -> [JannoRow] -> PoseidonIO ()
writingJannoFile [Char]
outPath [Char]
outName IOVector Int
newNrSnps [JannoRow]
relevantJannoRows
        ForgeOutMode
NormalOut  -> do
            PoseidonPackage
pac <- [Char]
-> [Char]
-> GenotypeDataSpec
-> Maybe (Either [EigenstratIndEntry] JannoRows)
-> SeqSourceRows
-> [BibEntry]
-> ReaderT Env IO PoseidonPackage
newPackageTemplate [Char]
outPath [Char]
outName GenotypeDataSpec
genotypeData
                    (Either [EigenstratIndEntry] JannoRows
-> Maybe (Either [EigenstratIndEntry] JannoRows)
forall a. a -> Maybe a
Just (JannoRows -> Either [EigenstratIndEntry] JannoRows
forall a b. b -> Either a b
Right JannoRows
newJanno)) SeqSourceRows
relevantSeqSourceRows [BibEntry]
relevantBibEntries
            PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac
            [Char] -> [Char] -> SeqSourceRows -> PoseidonIO ()
writeSSFile [Char]
outPath [Char]
outName SeqSourceRows
relevantSeqSourceRows
            [Char] -> [Char] -> [BibEntry] -> PoseidonIO ()
writeBibFile [Char]
outPath [Char]
outName [BibEntry]
relevantBibEntries
            IOVector Int
newNrSnps <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
            [Char] -> [Char] -> IOVector Int -> [JannoRow] -> PoseidonIO ()
writingJannoFile [Char]
outPath [Char]
outName IOVector Int
newNrSnps [JannoRow]
relevantJannoRows

    where
        -- individual writer functions --
        writePoseidonYmlFile :: PoseidonPackage -> PoseidonIO ()
        writePoseidonYmlFile :: PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac = do
            [Char] -> PoseidonIO ()
logInfo [Char]
"Creating POSEIDON.yml"
            IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ PoseidonPackage -> IO ()
writePoseidonPackage PoseidonPackage
pac
        writeSSFile :: FilePath -> String -> SeqSourceRows -> PoseidonIO ()
        writeSSFile :: [Char] -> [Char] -> SeqSourceRows -> PoseidonIO ()
writeSSFile [Char]
outPath [Char]
outName SeqSourceRows
rows = do
            Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([SeqSourceRow] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (SeqSourceRows -> [SeqSourceRow]
getSeqSourceRowList SeqSourceRows
rows)) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
                [Char] -> PoseidonIO ()
logInfo [Char]
"Creating .ssf file"
                IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> SeqSourceRows -> IO ()
writeSeqSourceFile ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
"ssf") SeqSourceRows
rows
        writeBibFile :: FilePath -> String -> BibTeX -> PoseidonIO ()
        writeBibFile :: [Char] -> [Char] -> [BibEntry] -> PoseidonIO ()
writeBibFile [Char]
outPath [Char]
outName [BibEntry]
entries = do
            Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([BibEntry] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [BibEntry]
entries) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
                [Char] -> PoseidonIO ()
logInfo [Char]
"Creating .bib file"
                IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [BibEntry] -> IO ()
writeBibTeXFile ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
"bib") [BibEntry]
entries
        copyREADMEFile :: FilePath -> PoseidonPackage -> PoseidonIO ()
        copyREADMEFile :: [Char] -> PoseidonPackage -> PoseidonIO ()
copyREADMEFile [Char]
outPath PoseidonPackage
pacSource = do
            case PoseidonPackage -> Maybe [Char]
posPacReadmeFile PoseidonPackage
pacSource of
                Maybe [Char]
Nothing -> () -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                (Just [Char]
path) -> do
                    [Char] -> PoseidonIO ()
logInfo [Char]
"Copying README file from source package"
                    let fullSourcePath :: [Char]
fullSourcePath = PoseidonPackage -> [Char]
posPacBaseDir PoseidonPackage
pacSource [Char] -> [Char] -> [Char]
</> [Char]
path
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile [Char]
fullSourcePath Maybe [Char]
forall a. Maybe a
Nothing
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> IO ()
copyFile [Char]
fullSourcePath ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
path
        copyCHANGELOGFile :: FilePath -> PoseidonPackage -> PoseidonIO ()
        copyCHANGELOGFile :: [Char] -> PoseidonPackage -> PoseidonIO ()
copyCHANGELOGFile [Char]
outPath PoseidonPackage
pacSource = do
            case PoseidonPackage -> Maybe [Char]
posPacChangelogFile PoseidonPackage
pacSource of
                Maybe [Char]
Nothing -> () -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                (Just [Char]
path) -> do
                    [Char] -> PoseidonIO ()
logInfo [Char]
"Copying CHANGELOG file from source package"
                    let fullSourcePath :: [Char]
fullSourcePath = PoseidonPackage -> [Char]
posPacBaseDir PoseidonPackage
pacSource [Char] -> [Char] -> [Char]
</> [Char]
path
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile [Char]
fullSourcePath Maybe [Char]
forall a. Maybe a
Nothing
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> IO ()
copyFile [Char]
fullSourcePath ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
path
        compileGenotypeData :: FilePath -> GenotypeFileSpec -> [PoseidonPackage] -> [Int] -> PoseidonIO (VUM.IOVector Int)
        compileGenotypeData :: [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
gFileSpec [PoseidonPackage]
relevantPackages [Int]
relevantIndices = do
            [Char] -> PoseidonIO ()
logInfo [Char]
"Compiling genotype data"
            [Char] -> PoseidonIO ()
logInfo [Char]
"Processing SNPs..."
            LogA
logA <- PoseidonIO LogA
envLogAction
            UTCTime
currentTime <- IO UTCTime -> ReaderT Env IO UTCTime
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO UTCTime
getCurrentTime
            ErrorLength
errLength <- PoseidonIO ErrorLength
envErrorLength
            IOVector Int
newNrSNPs <- IO (IOVector Int) -> PoseidonIO (IOVector Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IOVector Int) -> PoseidonIO (IOVector Int))
-> IO (IOVector Int) -> PoseidonIO (IOVector Int)
forall a b. (a -> b) -> a -> b
$ IO (IOVector Int)
-> (SomeException -> IO (IOVector Int)) -> IO (IOVector Int)
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
catch (
                SafeT IO (IOVector Int) -> IO (IOVector Int)
forall (m :: * -> *) r.
(MonadMask m, MonadIO m) =>
SafeT m r -> m r
runSafeT (SafeT IO (IOVector Int) -> IO (IOVector Int))
-> SafeT IO (IOVector Int) -> IO (IOVector Int)
forall a b. (a -> b) -> a -> b
$ do
                    Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
eigenstratProd <- LogA
-> Bool
-> [PoseidonPackage]
-> Maybe [Char]
-> SafeT IO (Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ())
forall (m :: * -> *).
MonadSafe m =>
LogA
-> Bool
-> [PoseidonPackage]
-> Maybe [Char]
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
getJointGenotypeData LogA
logA Bool
intersect_ [PoseidonPackage]
relevantPackages Maybe [Char]
maybeSnpFile
                    let eigenstratIndEntries :: [EigenstratIndEntry]
eigenstratIndEntries = JannoRows -> [EigenstratIndEntry]
jannoRows2EigenstratIndEntries (JannoRows -> [EigenstratIndEntry])
-> ([PoseidonPackage] -> JannoRows)
-> [PoseidonPackage]
-> [EigenstratIndEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PoseidonPackage] -> JannoRows
getJointJanno ([PoseidonPackage] -> [EigenstratIndEntry])
-> [PoseidonPackage] -> [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ [PoseidonPackage]
relevantPackages
                    let newEigenstratIndEntries :: [EigenstratIndEntry]
newEigenstratIndEntries = (Int -> EigenstratIndEntry) -> [Int] -> [EigenstratIndEntry]
forall a b. (a -> b) -> [a] -> [b]
map ([EigenstratIndEntry]
eigenstratIndEntries [EigenstratIndEntry] -> Int -> EigenstratIndEntry
forall a. HasCallStack => [a] -> Int -> a
!!) [Int]
relevantIndices
                    let outConsumer :: Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
outConsumer = case GenotypeFileSpec
gFileSpec of
                            GenotypeEigenstrat [Char]
outG Maybe [Char]
_ [Char]
outS Maybe [Char]
_ [Char]
outI Maybe [Char]
_ ->
                                [Char]
-> [Char]
-> [Char]
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writeEigenstrat ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outG) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outS) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outI) [EigenstratIndEntry]
newEigenstratIndEntries
                            GenotypePlink      [Char]
outG Maybe [Char]
_ [Char]
outS Maybe [Char]
_ [Char]
outI Maybe [Char]
_ ->
                                [Char]
-> [Char]
-> [Char]
-> [PlinkFamEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> [PlinkFamEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writePlink      ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outG) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outS) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outI) ((EigenstratIndEntry -> PlinkFamEntry)
-> [EigenstratIndEntry] -> [PlinkFamEntry]
forall a b. (a -> b) -> [a] -> [b]
map (PlinkPopNameMode -> EigenstratIndEntry -> PlinkFamEntry
eigenstratInd2PlinkFam PlinkPopNameMode
outPlinkPopMode) [EigenstratIndEntry]
newEigenstratIndEntries)
                            GenotypeFileSpec
_  -> IO () -> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall a.
IO a -> Proxy () (EigenstratSnpEntry, GenoLine) () X (SafeT IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ())
-> (PoseidonException -> IO ())
-> PoseidonException
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (PoseidonException
 -> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ())
-> PoseidonException
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall a b. (a -> b) -> a -> b
$
                                [Char] -> PoseidonException
PoseidonGenericException [Char]
"only Outformats EIGENSTRAT or PLINK are allowed at the moment"
                    let extractPipe :: Pipe
  (EigenstratSnpEntry, GenoLine)
  (EigenstratSnpEntry, GenoLine)
  (SafeT IO)
  r
extractPipe = if Bool
packageWise then Pipe
  (EigenstratSnpEntry, GenoLine)
  (EigenstratSnpEntry, GenoLine)
  (SafeT IO)
  r
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat else ((EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine))
-> Pipe
     (EigenstratSnpEntry, GenoLine)
     (EigenstratSnpEntry, GenoLine)
     (SafeT IO)
     r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map ([Int]
-> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices [Int]
relevantIndices)
                    -- define main forge pipe including file output.
                    -- The final tee forwards the results to be used in the snpCounting-fold
                    let forgePipe :: Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forgePipe = Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
eigenstratProd Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
     ()
     (EigenstratSnpEntry, GenoLine)
     ()
     (EigenstratSnpEntry, GenoLine)
     (SafeT IO)
     ()
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
                            LogA
-> UTCTime
-> Proxy
     ()
     (EigenstratSnpEntry, GenoLine)
     ()
     (EigenstratSnpEntry, GenoLine)
     (SafeT IO)
     ()
forall (m :: * -> *) a.
MonadIO m =>
LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress LogA
logA UTCTime
currentTime Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
     ()
     (EigenstratSnpEntry, GenoLine)
     ()
     (EigenstratSnpEntry, GenoLine)
     (SafeT IO)
     ()
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
                            Proxy
  ()
  (EigenstratSnpEntry, GenoLine)
  ()
  (EigenstratSnpEntry, GenoLine)
  (SafeT IO)
  ()
forall {r}.
Pipe
  (EigenstratSnpEntry, GenoLine)
  (EigenstratSnpEntry, GenoLine)
  (SafeT IO)
  r
extractPipe Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
     ()
     (EigenstratSnpEntry, GenoLine)
     ()
     (EigenstratSnpEntry, GenoLine)
     (SafeT IO)
     ()
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
                            Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
     ()
     (EigenstratSnpEntry, GenoLine)
     ()
     (EigenstratSnpEntry, GenoLine)
     (SafeT IO)
     ()
forall (m :: * -> *) a r. Monad m => Consumer a m r -> Pipe a a m r
P.tee Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
outConsumer
                    let startAcc :: SafeT IO (IOVector Int)
startAcc = IO (IOVector Int) -> SafeT IO (IOVector Int)
forall a. IO a -> SafeT IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IOVector Int) -> SafeT IO (IOVector Int))
-> IO (IOVector Int) -> SafeT IO (IOVector Int)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> IO (MVector (PrimState IO) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
VUM.replicate ([EigenstratIndEntry] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [EigenstratIndEntry]
newEigenstratIndEntries) Int
0
                    (IOVector Int
 -> (EigenstratSnpEntry, GenoLine) -> SafeT IO (IOVector Int))
-> SafeT IO (IOVector Int)
-> (IOVector Int -> SafeT IO (IOVector Int))
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> SafeT IO (IOVector Int)
forall (m :: * -> *) x a b.
Monad m =>
(x -> a -> m x) -> m x -> (x -> m b) -> Producer a m () -> m b
P.foldM IOVector Int
-> (EigenstratSnpEntry, GenoLine) -> SafeT IO (IOVector Int)
sumNonMissingSNPs SafeT IO (IOVector Int)
startAcc IOVector Int -> SafeT IO (IOVector Int)
forall a. a -> SafeT IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forgePipe
                ) (PoseidonException -> IO (IOVector Int)
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO (IOVector Int))
-> (SomeException -> PoseidonException)
-> SomeException
-> IO (IOVector Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorLength -> SomeException -> PoseidonException
PoseidonGenotypeExceptionForward ErrorLength
errLength)
            [Char] -> PoseidonIO ()
logInfo [Char]
"Done"
            IOVector Int -> PoseidonIO (IOVector Int)
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector Int
newNrSNPs
        writingJannoFile :: FilePath -> String -> VUM.MVector VUM.RealWorld Int -> [JannoRow] -> PoseidonIO ()
        writingJannoFile :: [Char] -> [Char] -> IOVector Int -> [JannoRow] -> PoseidonIO ()
writingJannoFile [Char]
outPath [Char]
outName IOVector Int
newNrSNPs [JannoRow]
rows = do
            [Char] -> PoseidonIO ()
logInfo [Char]
"Creating .janno file"
            Vector Int
snpList <- IO (Vector Int) -> ReaderT Env IO (Vector Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Vector Int) -> ReaderT Env IO (Vector Int))
-> IO (Vector Int) -> ReaderT Env IO (Vector Int)
forall a b. (a -> b) -> a -> b
$ MVector (PrimState IO) Int -> IO (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.freeze IOVector Int
MVector (PrimState IO) Int
newNrSNPs
            let jannoRowsWithNewSNPNumbers :: [JannoRow]
jannoRowsWithNewSNPNumbers =
                    (JannoRow -> Int -> JannoRow) -> [JannoRow] -> [Int] -> [JannoRow]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\JannoRow
x Int
y -> JannoRow
x {jNrSNPs :: Maybe JannoNrSNPs
jNrSNPs = JannoNrSNPs -> Maybe JannoNrSNPs
forall a. a -> Maybe a
Just (Int -> JannoNrSNPs
JannoNrSNPs Int
y)}) [JannoRow]
rows (Vector Int -> [Int]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Int
snpList)
            IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> JannoRows -> IO ()
writeJannoFile ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
"janno") ([JannoRow] -> JannoRows
JannoRows [JannoRow]
jannoRowsWithNewSNPNumbers)

sumNonMissingSNPs :: VUM.IOVector Int -> (EigenstratSnpEntry, GenoLine) -> SafeT IO (VUM.IOVector Int)
sumNonMissingSNPs :: IOVector Int
-> (EigenstratSnpEntry, GenoLine) -> SafeT IO (IOVector Int)
sumNonMissingSNPs IOVector Int
accumulator (EigenstratSnpEntry
_, GenoLine
geno) = do
    [(GenoEntry, Int)]
-> ((GenoEntry, Int) -> SafeT IO ()) -> SafeT IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([GenoEntry] -> [Int] -> [(GenoEntry, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip (GenoLine -> [GenoEntry]
forall a. Vector a -> [a]
V.toList GenoLine
geno) [Int
0..]) (((GenoEntry, Int) -> SafeT IO ()) -> SafeT IO ())
-> ((GenoEntry, Int) -> SafeT IO ()) -> SafeT IO ()
forall a b. (a -> b) -> a -> b
$ \(GenoEntry
g, Int
i) -> do
        let x :: Int
x = GenoEntry -> Int
nonMissingToInt GenoEntry
g
        MVector (PrimState (SafeT IO)) Int
-> (Int -> Int) -> Int -> SafeT IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
VUM.modify IOVector Int
MVector (PrimState (SafeT IO)) Int
accumulator (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
x) Int
i
    IOVector Int -> SafeT IO (IOVector Int)
forall a. a -> SafeT IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector Int
accumulator
  where
    nonMissingToInt :: GenoEntry -> Int
    nonMissingToInt :: GenoEntry -> Int
nonMissingToInt GenoEntry
x
        | GenoEntry
x GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
== GenoEntry
Missing = Int
0
        | Bool
otherwise = Int
1

filterSeqSourceRows :: JannoRows -> SeqSourceRows -> SeqSourceRows
filterSeqSourceRows :: JannoRows -> SeqSourceRows -> SeqSourceRows
filterSeqSourceRows (JannoRows [JannoRow]
jRows) (SeqSourceRows [SeqSourceRow]
sRows) =
    let desiredPoseidonIDs :: [[Char]]
desiredPoseidonIDs = (JannoRow -> [Char]) -> [JannoRow] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map JannoRow -> [Char]
jPoseidonID [JannoRow]
jRows
    in [SeqSourceRow] -> SeqSourceRows
SeqSourceRows ([SeqSourceRow] -> SeqSourceRows)
-> [SeqSourceRow] -> SeqSourceRows
forall a b. (a -> b) -> a -> b
$ (SeqSourceRow -> Bool) -> [SeqSourceRow] -> [SeqSourceRow]
forall a. (a -> Bool) -> [a] -> [a]
filter ([[Char]] -> SeqSourceRow -> Bool
hasAPoseidonID [[Char]]
desiredPoseidonIDs) [SeqSourceRow]
sRows
    where
        hasAPoseidonID :: [String] -> SeqSourceRow -> Bool
        hasAPoseidonID :: [[Char]] -> SeqSourceRow -> Bool
hasAPoseidonID [[Char]]
jIDs SeqSourceRow
seqSourceRow =
            let sIDs :: [[Char]]
sIDs = Maybe (ListColumn [Char]) -> [[Char]]
forall a. Maybe (ListColumn a) -> [a]
getMaybeListColumn (Maybe (ListColumn [Char]) -> [[Char]])
-> Maybe (ListColumn [Char]) -> [[Char]]
forall a b. (a -> b) -> a -> b
$ SeqSourceRow -> Maybe (ListColumn [Char])
sPoseidonID SeqSourceRow
seqSourceRow
            in ([Char] -> Bool) -> [[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ([Char] -> [[Char]] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [[Char]]
jIDs) [[Char]]
sIDs

filterBibEntries :: JannoRows -> BibTeX -> BibTeX
filterBibEntries :: JannoRows -> [BibEntry] -> [BibEntry]
filterBibEntries (JannoRows [JannoRow]
rows) [BibEntry]
references_ =
    let relevantPublications :: [[Char]]
relevantPublications = (JannoPublication -> [Char]) -> [JannoPublication] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map JannoPublication -> [Char]
forall a. Show a => a -> [Char]
show ([JannoPublication] -> [[Char]])
-> ([JannoRow] -> [JannoPublication]) -> [JannoRow] -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [JannoPublication] -> [JannoPublication]
forall a. Eq a => [a] -> [a]
nub ([JannoPublication] -> [JannoPublication])
-> ([JannoRow] -> [JannoPublication])
-> [JannoRow]
-> [JannoPublication]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ListColumn JannoPublication -> [JannoPublication])
-> [ListColumn JannoPublication] -> [JannoPublication]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ListColumn JannoPublication -> [JannoPublication]
forall a. ListColumn a -> [a]
getListColumn ([ListColumn JannoPublication] -> [JannoPublication])
-> ([JannoRow] -> [ListColumn JannoPublication])
-> [JannoRow]
-> [JannoPublication]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (JannoRow -> Maybe (ListColumn JannoPublication))
-> [JannoRow] -> [ListColumn JannoPublication]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe JannoRow -> Maybe (ListColumn JannoPublication)
jPublication ([JannoRow] -> [[Char]]) -> [JannoRow] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [JannoRow]
rows
    in (BibEntry -> Bool) -> [BibEntry] -> [BibEntry]
forall a. (a -> Bool) -> [a] -> [a]
filter (\BibEntry
x-> BibEntry -> [Char]
bibEntryId BibEntry
x [Char] -> [[Char]] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [[Char]]
relevantPublications) [BibEntry]
references_

fillMissingSnpSets :: [PoseidonPackage] -> PoseidonIO [SNPSetSpec]
fillMissingSnpSets :: [PoseidonPackage] -> PoseidonIO [SNPSetSpec]
fillMissingSnpSets [PoseidonPackage]
packages = [PoseidonPackage]
-> (PoseidonPackage -> ReaderT Env IO SNPSetSpec)
-> PoseidonIO [SNPSetSpec]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [PoseidonPackage]
packages ((PoseidonPackage -> ReaderT Env IO SNPSetSpec)
 -> PoseidonIO [SNPSetSpec])
-> (PoseidonPackage -> ReaderT Env IO SNPSetSpec)
-> PoseidonIO [SNPSetSpec]
forall a b. (a -> b) -> a -> b
$ \PoseidonPackage
pac -> do
    let pac_ :: PacNameAndVersion
pac_ = PoseidonPackage -> PacNameAndVersion
posPacNameAndVersion PoseidonPackage
pac
        maybeSnpSet :: Maybe SNPSetSpec
maybeSnpSet = GenotypeDataSpec -> Maybe SNPSetSpec
genotypeSnpSet (GenotypeDataSpec -> Maybe SNPSetSpec)
-> (PoseidonPackage -> GenotypeDataSpec)
-> PoseidonPackage
-> Maybe SNPSetSpec
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> GenotypeDataSpec
posPacGenotypeData (PoseidonPackage -> Maybe SNPSetSpec)
-> PoseidonPackage -> Maybe SNPSetSpec
forall a b. (a -> b) -> a -> b
$ PoseidonPackage
pac
    case Maybe SNPSetSpec
maybeSnpSet of
        Just SNPSetSpec
s -> SNPSetSpec -> ReaderT Env IO SNPSetSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SNPSetSpec
s
        Maybe SNPSetSpec
Nothing -> do
            [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Warning for package " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ PacNameAndVersion -> [Char]
forall a. Show a => a -> [Char]
show PacNameAndVersion
pac_ [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
": field \"snpSet\" \
                \is not set. I will interpret this as \"snpSet: Other\""
            SNPSetSpec -> ReaderT Env IO SNPSetSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SNPSetSpec
SNPSetOther