{-# LANGUAGE OverloadedStrings #-}
module Poseidon.CLI.Forge where
import Poseidon.BibFile (BibEntry (..), BibTeX,
writeBibTeXFile)
import Poseidon.EntityTypes (EntityInput,
PacNameAndVersion (..),
PoseidonEntity (..),
SignedEntity (..),
checkIfAllEntitiesExist,
isLatestInCollection,
makePacNameAndVersion,
readEntityInputs,
resolveUniqueEntityIndices)
import Poseidon.GenotypeData (GenoDataSource (..),
GenotypeDataSpec (..),
GenotypeFileSpec (..),
SNPSetSpec (..),
printSNPCopyProgress,
selectIndices, snpSetMergeList)
import Poseidon.Janno (JannoRow (..), JannoRows (..),
ListColumn (..),
getMaybeListColumn,
jannoRows2EigenstratIndEntries,
writeJannoFile)
import Poseidon.Package (PackageReadOptions (..),
PoseidonPackage (..),
defaultPackageReadOptions,
filterToRelevantPackages,
getJointGenotypeData,
getJointIndividualInfo,
getJointJanno,
makePseudoPackageFromGenotypeData,
newMinimalPackageTemplate,
newPackageTemplate,
readPoseidonPackageCollection,
writePoseidonPackage)
import Poseidon.SequencingSource (SeqSourceRow (..),
SeqSourceRows (..),
writeSeqSourceFile)
import Poseidon.Utils (PoseidonException (..),
PoseidonIO, checkFile,
determinePackageOutName,
envErrorLength, envLogAction,
logInfo, logWarning, uniqueRO)
import Control.Exception (catch, throwIO)
import Control.Monad (filterM, forM, forM_, unless,
when)
import Data.List (intercalate, nub)
import Data.Maybe (mapMaybe)
import Data.Time (getCurrentTime)
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import Pipes (MonadIO (liftIO), cat, (>->))
import qualified Pipes.Prelude as P
import Pipes.Safe (SafeT, runSafeT)
import Poseidon.ColumnTypes (JannoNrSNPs (..))
import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..),
GenoEntry (..), GenoLine,
writeEigenstrat)
import SequenceFormats.Plink (PlinkPopNameMode,
eigenstratInd2PlinkFam,
writePlink)
import System.Directory (copyFile,
createDirectoryIfMissing)
import System.FilePath (dropTrailingPathSeparator, (<.>),
(</>))
data ForgeOptions = ForgeOptions
{ ForgeOptions -> [GenoDataSource]
_forgeGenoSources :: [GenoDataSource]
, ForgeOptions -> [EntityInput SignedEntity]
_forgeEntityInput :: [EntityInput SignedEntity]
, ForgeOptions -> Maybe [Char]
_forgeSnpFile :: Maybe FilePath
, ForgeOptions -> Bool
_forgeIntersect :: Bool
, ForgeOptions -> [Char]
_forgeOutFormat :: String
, ForgeOptions -> ForgeOutMode
_forgeOutMode :: ForgeOutMode
, ForgeOptions -> Bool
_forgeOutZip :: Bool
, ForgeOptions -> [Char]
_forgeOutPacPath :: FilePath
, ForgeOptions -> Maybe [Char]
_forgeOutPacName :: Maybe String
, ForgeOptions -> Bool
_forgePackageWise :: Bool
, ForgeOptions -> PlinkPopNameMode
_forgeOutputPlinkPopMode :: PlinkPopNameMode
, ForgeOptions -> Bool
_forgeOutputOrdered :: Bool
}
data ForgeOutMode =
GenoOut
| MinimalOut
| PreservePymlOut
| NormalOut
preservePyml :: ForgeOutMode -> Bool
preservePyml :: ForgeOutMode -> Bool
preservePyml ForgeOutMode
PreservePymlOut = Bool
True
preservePyml ForgeOutMode
_ = Bool
False
pacReadOpts :: PackageReadOptions
pacReadOpts :: PackageReadOptions
pacReadOpts = PackageReadOptions
defaultPackageReadOptions {
_readOptIgnoreChecksums :: Bool
_readOptIgnoreChecksums = Bool
True
, _readOptIgnoreGeno :: Bool
_readOptIgnoreGeno = Bool
False
, _readOptGenoCheck :: Bool
_readOptGenoCheck = Bool
True
}
runForge :: ForgeOptions -> PoseidonIO ()
runForge :: ForgeOptions -> PoseidonIO ()
runForge (
ForgeOptions [GenoDataSource]
genoSources
[EntityInput SignedEntity]
entityInputs Maybe [Char]
maybeSnpFile Bool
intersect_
[Char]
outFormat ForgeOutMode
outMode Bool
outZip [Char]
outPathRaw Maybe [Char]
maybeOutName
Bool
packageWise PlinkPopNameMode
outPlinkPopMode
Bool
outputOrdered
) = do
[PoseidonPackage]
properPackages <- PackageReadOptions -> [[Char]] -> PoseidonIO [PoseidonPackage]
readPoseidonPackageCollection PackageReadOptions
pacReadOpts ([[Char]] -> PoseidonIO [PoseidonPackage])
-> [[Char]] -> PoseidonIO [PoseidonPackage]
forall a b. (a -> b) -> a -> b
$ [GenoDataSource -> [Char]
getPacBaseDir GenoDataSource
x | x :: GenoDataSource
x@PacBaseDir {} <- [GenoDataSource]
genoSources]
[PoseidonPackage]
pseudoPackages <- (GenotypeDataSpec -> ReaderT Env IO PoseidonPackage)
-> [GenotypeDataSpec] -> PoseidonIO [PoseidonPackage]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM GenotypeDataSpec -> ReaderT Env IO PoseidonPackage
makePseudoPackageFromGenotypeData [GenoDataSource -> GenotypeDataSpec
getGenoDirect GenoDataSource
x | x :: GenoDataSource
x@GenoDirect {} <- [GenoDataSource]
genoSources]
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Unpackaged genotype data files loaded: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([PoseidonPackage] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [PoseidonPackage]
pseudoPackages)
let allPackages :: [PoseidonPackage]
allPackages = [PoseidonPackage]
properPackages [PoseidonPackage] -> [PoseidonPackage] -> [PoseidonPackage]
forall a. [a] -> [a] -> [a]
++ [PoseidonPackage]
pseudoPackages
[SignedEntity]
entitiesUser <- [EntityInput SignedEntity] -> ReaderT Env IO [SignedEntity]
forall (m :: * -> *) a.
(MonadIO m, EntitySpec a, Eq a) =>
[EntityInput a] -> m [a]
readEntityInputs [EntityInput SignedEntity]
entityInputs
[PoseidonPackage]
allLatestPackages <- (PoseidonPackage -> ReaderT Env IO Bool)
-> [PoseidonPackage] -> PoseidonIO [PoseidonPackage]
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM ([PoseidonPackage] -> PoseidonPackage -> ReaderT Env IO Bool
forall (m :: * -> *) a.
(MonadThrow m, HasNameAndVersion a) =>
[a] -> a -> m Bool
isLatestInCollection [PoseidonPackage]
allPackages) [PoseidonPackage]
allPackages
[SignedEntity]
entities <- case [SignedEntity]
entitiesUser of
[] -> do
[Char] -> PoseidonIO ()
logInfo [Char]
"No requested entities. Implicitly forging all packages."
[SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([SignedEntity] -> ReaderT Env IO [SignedEntity])
-> [SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> SignedEntity)
-> [PoseidonPackage] -> [SignedEntity]
forall a b. (a -> b) -> [a] -> [b]
map (PoseidonEntity -> SignedEntity
Include (PoseidonEntity -> SignedEntity)
-> (PoseidonPackage -> PoseidonEntity)
-> PoseidonPackage
-> SignedEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PacNameAndVersion -> PoseidonEntity
Pac (PacNameAndVersion -> PoseidonEntity)
-> (PoseidonPackage -> PacNameAndVersion)
-> PoseidonPackage
-> PoseidonEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> PacNameAndVersion
forall a. HasNameAndVersion a => a -> PacNameAndVersion
makePacNameAndVersion) [PoseidonPackage]
allLatestPackages
(Include PoseidonEntity
_:[SignedEntity]
_) -> do
[SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [SignedEntity]
entitiesUser
(Exclude PoseidonEntity
_:[SignedEntity]
_) -> do
[Char] -> PoseidonIO ()
logInfo [Char]
"forge entities begin with exclude, so implicitly adding all packages \
\(latest versions) as includes before applying excludes."
[SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([SignedEntity] -> ReaderT Env IO [SignedEntity])
-> [SignedEntity] -> ReaderT Env IO [SignedEntity]
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> SignedEntity)
-> [PoseidonPackage] -> [SignedEntity]
forall a b. (a -> b) -> [a] -> [b]
map (PoseidonEntity -> SignedEntity
Include (PoseidonEntity -> SignedEntity)
-> (PoseidonPackage -> PoseidonEntity)
-> PoseidonPackage
-> SignedEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PacNameAndVersion -> PoseidonEntity
Pac (PacNameAndVersion -> PoseidonEntity)
-> (PoseidonPackage -> PacNameAndVersion)
-> PoseidonPackage
-> PoseidonEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> PacNameAndVersion
forall a. HasNameAndVersion a => a -> PacNameAndVersion
makePacNameAndVersion) [PoseidonPackage]
allLatestPackages [SignedEntity] -> [SignedEntity] -> [SignedEntity]
forall a. [a] -> [a] -> [a]
++ [SignedEntity]
entitiesUser
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Forging with the following entity-list: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([Char] -> [[Char]] -> [Char]
forall a. [a] -> [[a]] -> [a]
intercalate [Char]
", " ([[Char]] -> [Char])
-> ([SignedEntity] -> [[Char]]) -> [SignedEntity] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (SignedEntity -> [Char]) -> [SignedEntity] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map SignedEntity -> [Char]
forall a. Show a => a -> [Char]
show ([SignedEntity] -> [[Char]])
-> ([SignedEntity] -> [SignedEntity]) -> [SignedEntity] -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [SignedEntity] -> [SignedEntity]
forall a. Int -> [a] -> [a]
take Int
10) [SignedEntity]
entities [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
if [SignedEntity] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SignedEntity]
entities Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10 then [Char]
" and " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show ([SignedEntity] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SignedEntity]
entities Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
10) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" more" else [Char]
""
[PoseidonPackage] -> ReaderT Env IO IndividualInfoCollection
forall (m :: * -> *).
MonadThrow m =>
[PoseidonPackage] -> m IndividualInfoCollection
getJointIndividualInfo [PoseidonPackage]
allPackages ReaderT Env IO IndividualInfoCollection
-> (IndividualInfoCollection -> PoseidonIO ()) -> PoseidonIO ()
forall a b.
ReaderT Env IO a -> (a -> ReaderT Env IO b) -> ReaderT Env IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SignedEntity] -> IndividualInfoCollection -> PoseidonIO ()
forall a.
EntitySpec a =>
[a] -> IndividualInfoCollection -> PoseidonIO ()
checkIfAllEntitiesExist [SignedEntity]
entities
[PoseidonPackage]
relevantPackages <- [SignedEntity] -> [PoseidonPackage] -> PoseidonIO [PoseidonPackage]
forall (m :: * -> *) a.
(MonadThrow m, EntitySpec a) =>
[a] -> [PoseidonPackage] -> m [PoseidonPackage]
filterToRelevantPackages [SignedEntity]
entities [PoseidonPackage]
allPackages
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ (Int -> [Char]
forall a. Show a => a -> [Char]
show (Int -> [Char])
-> ([PoseidonPackage] -> Int) -> [PoseidonPackage] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PoseidonPackage] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([PoseidonPackage] -> [Char]) -> [PoseidonPackage] -> [Char]
forall a b. (a -> b) -> a -> b
$ [PoseidonPackage]
relevantPackages) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" packages contain data for this forging operation"
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([PoseidonPackage] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [PoseidonPackage]
relevantPackages) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO PoseidonException
PoseidonEmptyForgeException
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([PoseidonPackage] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [PoseidonPackage]
relevantPackages Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 Bool -> Bool -> Bool
&& ForgeOutMode -> Bool
preservePyml ForgeOutMode
outMode) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO PoseidonException
PoseidonCantPreserveException
IndividualInfoCollection
indInfoCollection <- [PoseidonPackage] -> ReaderT Env IO IndividualInfoCollection
forall (m :: * -> *).
MonadThrow m =>
[PoseidonPackage] -> m IndividualInfoCollection
getJointIndividualInfo [PoseidonPackage]
relevantPackages
let relevantEntities :: [SignedEntity]
relevantEntities =
if Bool
packageWise
then (PoseidonPackage -> SignedEntity)
-> [PoseidonPackage] -> [SignedEntity]
forall a b. (a -> b) -> [a] -> [b]
map (PoseidonEntity -> SignedEntity
Include (PoseidonEntity -> SignedEntity)
-> (PoseidonPackage -> PoseidonEntity)
-> PoseidonPackage
-> SignedEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PacNameAndVersion -> PoseidonEntity
Pac (PacNameAndVersion -> PoseidonEntity)
-> (PoseidonPackage -> PacNameAndVersion)
-> PoseidonPackage
-> PoseidonEntity
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> PacNameAndVersion
forall a. HasNameAndVersion a => a -> PacNameAndVersion
makePacNameAndVersion) [PoseidonPackage]
relevantPackages
else [SignedEntity]
entities
[Int]
relevantIndices <- Bool
-> [SignedEntity] -> IndividualInfoCollection -> PoseidonIO [Int]
forall a.
EntitySpec a =>
Bool -> [a] -> IndividualInfoCollection -> PoseidonIO [Int]
resolveUniqueEntityIndices Bool
outputOrdered [SignedEntity]
relevantEntities IndividualInfoCollection
indInfoCollection
let (JannoRows [JannoRow]
jannoRows) = [PoseidonPackage] -> JannoRows
getJointJanno [PoseidonPackage]
relevantPackages
newJanno :: JannoRows
newJanno@(JannoRows [JannoRow]
relevantJannoRows) = [JannoRow] -> JannoRows
JannoRows ([JannoRow] -> JannoRows) -> [JannoRow] -> JannoRows
forall a b. (a -> b) -> a -> b
$ (Int -> JannoRow) -> [Int] -> [JannoRow]
forall a b. (a -> b) -> [a] -> [b]
map ([JannoRow]
jannoRows [JannoRow] -> Int -> JannoRow
forall a. HasCallStack => [a] -> Int -> a
!!) [Int]
relevantIndices
let seqSourceRows :: SeqSourceRows
seqSourceRows = [SeqSourceRows] -> SeqSourceRows
forall a. Monoid a => [a] -> a
mconcat ([SeqSourceRows] -> SeqSourceRows)
-> [SeqSourceRows] -> SeqSourceRows
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> SeqSourceRows)
-> [PoseidonPackage] -> [SeqSourceRows]
forall a b. (a -> b) -> [a] -> [b]
map PoseidonPackage -> SeqSourceRows
posPacSeqSource [PoseidonPackage]
relevantPackages
relevantSeqSourceRows :: SeqSourceRows
relevantSeqSourceRows = JannoRows -> SeqSourceRows -> SeqSourceRows
filterSeqSourceRows JannoRows
newJanno SeqSourceRows
seqSourceRows
let bibEntries :: [BibEntry]
bibEntries = [BibEntry] -> [BibEntry]
forall a. Ord a => [a] -> [a]
uniqueRO ([BibEntry] -> [BibEntry]) -> [BibEntry] -> [BibEntry]
forall a b. (a -> b) -> a -> b
$ (PoseidonPackage -> [BibEntry]) -> [PoseidonPackage] -> [BibEntry]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap PoseidonPackage -> [BibEntry]
posPacBib [PoseidonPackage]
relevantPackages
relevantBibEntries :: [BibEntry]
relevantBibEntries = JannoRows -> [BibEntry] -> [BibEntry]
filterBibEntries JannoRows
newJanno [BibEntry]
bibEntries
let outPath :: [Char]
outPath = [Char] -> [Char]
dropTrailingPathSeparator [Char]
outPathRaw
[Char]
outName <- IO [Char] -> ReaderT Env IO [Char]
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [Char] -> ReaderT Env IO [Char])
-> IO [Char] -> ReaderT Env IO [Char]
forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> [Char] -> IO [Char]
determinePackageOutName Maybe [Char]
maybeOutName [Char]
outPath
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Writing to directory (will be created if missing): " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
outPath
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ Bool -> [Char] -> IO ()
createDirectoryIfMissing Bool
True [Char]
outPath
[SNPSetSpec]
snpSetList <- [PoseidonPackage] -> PoseidonIO [SNPSetSpec]
fillMissingSnpSets [PoseidonPackage]
relevantPackages
let newSNPSet :: SNPSetSpec
newSNPSet = case
Maybe [Char]
maybeSnpFile of
Maybe [Char]
Nothing -> [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList [SNPSetSpec]
snpSetList Bool
intersect_
Just [Char]
_ -> SNPSetSpec
SNPSetOther
let gz :: [Char]
gz = if Bool
outZip then [Char]
"gz" else [Char]
""
GenotypeFileSpec
genotypeFileData <- case [Char]
outFormat of
[Char]
"EIGENSTRAT" -> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec)
-> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a b. (a -> b) -> a -> b
$
[Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypeEigenstrat ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".geno" [Char] -> [Char] -> [Char]
<.> [Char]
gz) Maybe [Char]
forall a. Maybe a
Nothing
([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".snp" [Char] -> [Char] -> [Char]
<.> [Char]
gz) Maybe [Char]
forall a. Maybe a
Nothing
([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".ind") Maybe [Char]
forall a. Maybe a
Nothing
[Char]
"PLINK" -> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec)
-> GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a b. (a -> b) -> a -> b
$
[Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypePlink ([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".bed" [Char] -> [Char] -> [Char]
<.> [Char]
gz) Maybe [Char]
forall a. Maybe a
Nothing
([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".bim" [Char] -> [Char] -> [Char]
<.> [Char]
gz) Maybe [Char]
forall a. Maybe a
Nothing
([Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
".fam") Maybe [Char]
forall a. Maybe a
Nothing
[Char]
_ -> IO GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO GenotypeFileSpec -> ReaderT Env IO GenotypeFileSpec)
-> (PoseidonException -> IO GenotypeFileSpec)
-> PoseidonException
-> ReaderT Env IO GenotypeFileSpec
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO GenotypeFileSpec
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> ReaderT Env IO GenotypeFileSpec)
-> PoseidonException -> ReaderT Env IO GenotypeFileSpec
forall a b. (a -> b) -> a -> b
$
[Char] -> PoseidonException
PoseidonGenericException ([Char]
"Illegal outFormat " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
outFormat [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
". Only Outformats EIGENSTRAT or PLINK are allowed at the moment")
let genotypeData :: GenotypeDataSpec
genotypeData = GenotypeFileSpec -> Maybe SNPSetSpec -> GenotypeDataSpec
GenotypeDataSpec GenotypeFileSpec
genotypeFileData (SNPSetSpec -> Maybe SNPSetSpec
forall a. a -> Maybe a
Just SNPSetSpec
newSNPSet)
[Char] -> PoseidonIO ()
logInfo [Char]
"Creating new package entity"
let pacSource :: PoseidonPackage
pacSource = [PoseidonPackage] -> PoseidonPackage
forall a. HasCallStack => [a] -> a
head [PoseidonPackage]
relevantPackages
case ForgeOutMode
outMode of
ForgeOutMode
GenoOut -> do
IOVector Int
_ <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
() -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
ForgeOutMode
MinimalOut -> do
PoseidonPackage
pac <- [Char]
-> [Char] -> GenotypeDataSpec -> ReaderT Env IO PoseidonPackage
forall (m :: * -> *).
MonadThrow m =>
[Char] -> [Char] -> GenotypeDataSpec -> m PoseidonPackage
newMinimalPackageTemplate [Char]
outPath [Char]
outName GenotypeDataSpec
genotypeData
PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac
IOVector Int
_ <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
() -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
ForgeOutMode
PreservePymlOut -> do
PoseidonPackage
normalPac <- [Char]
-> [Char]
-> GenotypeDataSpec
-> Maybe (Either [EigenstratIndEntry] JannoRows)
-> SeqSourceRows
-> [BibEntry]
-> ReaderT Env IO PoseidonPackage
newPackageTemplate [Char]
outPath [Char]
outName GenotypeDataSpec
genotypeData
(Either [EigenstratIndEntry] JannoRows
-> Maybe (Either [EigenstratIndEntry] JannoRows)
forall a. a -> Maybe a
Just (JannoRows -> Either [EigenstratIndEntry] JannoRows
forall a b. b -> Either a b
Right JannoRows
newJanno)) SeqSourceRows
relevantSeqSourceRows [BibEntry]
relevantBibEntries
let pac :: PoseidonPackage
pac = PoseidonPackage
normalPac {
posPacNameAndVersion :: PacNameAndVersion
posPacNameAndVersion = (PoseidonPackage -> PacNameAndVersion
posPacNameAndVersion PoseidonPackage
pacSource) {panavName :: [Char]
panavName = [Char]
outName}
, posPacDescription :: Maybe [Char]
posPacDescription = PoseidonPackage -> Maybe [Char]
posPacDescription PoseidonPackage
pacSource
, posPacLastModified :: Maybe Day
posPacLastModified = PoseidonPackage -> Maybe Day
posPacLastModified PoseidonPackage
pacSource
, posPacContributor :: [ContributorSpec]
posPacContributor = PoseidonPackage -> [ContributorSpec]
posPacContributor PoseidonPackage
pacSource
, posPacReadmeFile :: Maybe [Char]
posPacReadmeFile = PoseidonPackage -> Maybe [Char]
posPacReadmeFile PoseidonPackage
pacSource
, posPacChangelogFile :: Maybe [Char]
posPacChangelogFile = PoseidonPackage -> Maybe [Char]
posPacChangelogFile PoseidonPackage
pacSource
}
PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac
[Char] -> [Char] -> SeqSourceRows -> PoseidonIO ()
writeSSFile [Char]
outPath [Char]
outName SeqSourceRows
relevantSeqSourceRows
[Char] -> [Char] -> [BibEntry] -> PoseidonIO ()
writeBibFile [Char]
outPath [Char]
outName [BibEntry]
relevantBibEntries
[Char] -> PoseidonPackage -> PoseidonIO ()
copyREADMEFile [Char]
outPath PoseidonPackage
pacSource
[Char] -> PoseidonPackage -> PoseidonIO ()
copyCHANGELOGFile [Char]
outPath PoseidonPackage
pacSource
IOVector Int
newNrSnps <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
[Char] -> [Char] -> IOVector Int -> [JannoRow] -> PoseidonIO ()
writingJannoFile [Char]
outPath [Char]
outName IOVector Int
newNrSnps [JannoRow]
relevantJannoRows
ForgeOutMode
NormalOut -> do
PoseidonPackage
pac <- [Char]
-> [Char]
-> GenotypeDataSpec
-> Maybe (Either [EigenstratIndEntry] JannoRows)
-> SeqSourceRows
-> [BibEntry]
-> ReaderT Env IO PoseidonPackage
newPackageTemplate [Char]
outPath [Char]
outName GenotypeDataSpec
genotypeData
(Either [EigenstratIndEntry] JannoRows
-> Maybe (Either [EigenstratIndEntry] JannoRows)
forall a. a -> Maybe a
Just (JannoRows -> Either [EigenstratIndEntry] JannoRows
forall a b. b -> Either a b
Right JannoRows
newJanno)) SeqSourceRows
relevantSeqSourceRows [BibEntry]
relevantBibEntries
PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac
[Char] -> [Char] -> SeqSourceRows -> PoseidonIO ()
writeSSFile [Char]
outPath [Char]
outName SeqSourceRows
relevantSeqSourceRows
[Char] -> [Char] -> [BibEntry] -> PoseidonIO ()
writeBibFile [Char]
outPath [Char]
outName [BibEntry]
relevantBibEntries
IOVector Int
newNrSnps <- [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
genotypeFileData [PoseidonPackage]
relevantPackages [Int]
relevantIndices
[Char] -> [Char] -> IOVector Int -> [JannoRow] -> PoseidonIO ()
writingJannoFile [Char]
outPath [Char]
outName IOVector Int
newNrSnps [JannoRow]
relevantJannoRows
where
writePoseidonYmlFile :: PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile :: PoseidonPackage -> PoseidonIO ()
writePoseidonYmlFile PoseidonPackage
pac = do
[Char] -> PoseidonIO ()
logInfo [Char]
"Creating POSEIDON.yml"
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ PoseidonPackage -> IO ()
writePoseidonPackage PoseidonPackage
pac
writeSSFile :: FilePath -> String -> SeqSourceRows -> PoseidonIO ()
writeSSFile :: [Char] -> [Char] -> SeqSourceRows -> PoseidonIO ()
writeSSFile [Char]
outPath [Char]
outName SeqSourceRows
rows = do
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([SeqSourceRow] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (SeqSourceRows -> [SeqSourceRow]
getSeqSourceRowList SeqSourceRows
rows)) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
[Char] -> PoseidonIO ()
logInfo [Char]
"Creating .ssf file"
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> SeqSourceRows -> IO ()
writeSeqSourceFile ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
"ssf") SeqSourceRows
rows
writeBibFile :: FilePath -> String -> BibTeX -> PoseidonIO ()
writeBibFile :: [Char] -> [Char] -> [BibEntry] -> PoseidonIO ()
writeBibFile [Char]
outPath [Char]
outName [BibEntry]
entries = do
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([BibEntry] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [BibEntry]
entries) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
[Char] -> PoseidonIO ()
logInfo [Char]
"Creating .bib file"
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [BibEntry] -> IO ()
writeBibTeXFile ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
"bib") [BibEntry]
entries
copyREADMEFile :: FilePath -> PoseidonPackage -> PoseidonIO ()
copyREADMEFile :: [Char] -> PoseidonPackage -> PoseidonIO ()
copyREADMEFile [Char]
outPath PoseidonPackage
pacSource = do
case PoseidonPackage -> Maybe [Char]
posPacReadmeFile PoseidonPackage
pacSource of
Maybe [Char]
Nothing -> () -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
(Just [Char]
path) -> do
[Char] -> PoseidonIO ()
logInfo [Char]
"Copying README file from source package"
let fullSourcePath :: [Char]
fullSourcePath = PoseidonPackage -> [Char]
posPacBaseDir PoseidonPackage
pacSource [Char] -> [Char] -> [Char]
</> [Char]
path
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile [Char]
fullSourcePath Maybe [Char]
forall a. Maybe a
Nothing
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> IO ()
copyFile [Char]
fullSourcePath ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
path
copyCHANGELOGFile :: FilePath -> PoseidonPackage -> PoseidonIO ()
copyCHANGELOGFile :: [Char] -> PoseidonPackage -> PoseidonIO ()
copyCHANGELOGFile [Char]
outPath PoseidonPackage
pacSource = do
case PoseidonPackage -> Maybe [Char]
posPacChangelogFile PoseidonPackage
pacSource of
Maybe [Char]
Nothing -> () -> PoseidonIO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
(Just [Char]
path) -> do
[Char] -> PoseidonIO ()
logInfo [Char]
"Copying CHANGELOG file from source package"
let fullSourcePath :: [Char]
fullSourcePath = PoseidonPackage -> [Char]
posPacBaseDir PoseidonPackage
pacSource [Char] -> [Char] -> [Char]
</> [Char]
path
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile [Char]
fullSourcePath Maybe [Char]
forall a. Maybe a
Nothing
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> IO ()
copyFile [Char]
fullSourcePath ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
path
compileGenotypeData :: FilePath -> GenotypeFileSpec -> [PoseidonPackage] -> [Int] -> PoseidonIO (VUM.IOVector Int)
compileGenotypeData :: [Char]
-> GenotypeFileSpec
-> [PoseidonPackage]
-> [Int]
-> PoseidonIO (IOVector Int)
compileGenotypeData [Char]
outPath GenotypeFileSpec
gFileSpec [PoseidonPackage]
relevantPackages [Int]
relevantIndices = do
[Char] -> PoseidonIO ()
logInfo [Char]
"Compiling genotype data"
[Char] -> PoseidonIO ()
logInfo [Char]
"Processing SNPs..."
LogA
logA <- PoseidonIO LogA
envLogAction
UTCTime
currentTime <- IO UTCTime -> ReaderT Env IO UTCTime
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO UTCTime
getCurrentTime
ErrorLength
errLength <- PoseidonIO ErrorLength
envErrorLength
IOVector Int
newNrSNPs <- IO (IOVector Int) -> PoseidonIO (IOVector Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IOVector Int) -> PoseidonIO (IOVector Int))
-> IO (IOVector Int) -> PoseidonIO (IOVector Int)
forall a b. (a -> b) -> a -> b
$ IO (IOVector Int)
-> (SomeException -> IO (IOVector Int)) -> IO (IOVector Int)
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
catch (
SafeT IO (IOVector Int) -> IO (IOVector Int)
forall (m :: * -> *) r.
(MonadMask m, MonadIO m) =>
SafeT m r -> m r
runSafeT (SafeT IO (IOVector Int) -> IO (IOVector Int))
-> SafeT IO (IOVector Int) -> IO (IOVector Int)
forall a b. (a -> b) -> a -> b
$ do
Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
eigenstratProd <- LogA
-> Bool
-> [PoseidonPackage]
-> Maybe [Char]
-> SafeT IO (Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ())
forall (m :: * -> *).
MonadSafe m =>
LogA
-> Bool
-> [PoseidonPackage]
-> Maybe [Char]
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
getJointGenotypeData LogA
logA Bool
intersect_ [PoseidonPackage]
relevantPackages Maybe [Char]
maybeSnpFile
let eigenstratIndEntries :: [EigenstratIndEntry]
eigenstratIndEntries = JannoRows -> [EigenstratIndEntry]
jannoRows2EigenstratIndEntries (JannoRows -> [EigenstratIndEntry])
-> ([PoseidonPackage] -> JannoRows)
-> [PoseidonPackage]
-> [EigenstratIndEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PoseidonPackage] -> JannoRows
getJointJanno ([PoseidonPackage] -> [EigenstratIndEntry])
-> [PoseidonPackage] -> [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ [PoseidonPackage]
relevantPackages
let newEigenstratIndEntries :: [EigenstratIndEntry]
newEigenstratIndEntries = (Int -> EigenstratIndEntry) -> [Int] -> [EigenstratIndEntry]
forall a b. (a -> b) -> [a] -> [b]
map ([EigenstratIndEntry]
eigenstratIndEntries [EigenstratIndEntry] -> Int -> EigenstratIndEntry
forall a. HasCallStack => [a] -> Int -> a
!!) [Int]
relevantIndices
let outConsumer :: Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
outConsumer = case GenotypeFileSpec
gFileSpec of
GenotypeEigenstrat [Char]
outG Maybe [Char]
_ [Char]
outS Maybe [Char]
_ [Char]
outI Maybe [Char]
_ ->
[Char]
-> [Char]
-> [Char]
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writeEigenstrat ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outG) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outS) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outI) [EigenstratIndEntry]
newEigenstratIndEntries
GenotypePlink [Char]
outG Maybe [Char]
_ [Char]
outS Maybe [Char]
_ [Char]
outI Maybe [Char]
_ ->
[Char]
-> [Char]
-> [Char]
-> [PlinkFamEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> [PlinkFamEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writePlink ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outG) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outS) ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outI) ((EigenstratIndEntry -> PlinkFamEntry)
-> [EigenstratIndEntry] -> [PlinkFamEntry]
forall a b. (a -> b) -> [a] -> [b]
map (PlinkPopNameMode -> EigenstratIndEntry -> PlinkFamEntry
eigenstratInd2PlinkFam PlinkPopNameMode
outPlinkPopMode) [EigenstratIndEntry]
newEigenstratIndEntries)
GenotypeFileSpec
_ -> IO () -> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall a.
IO a -> Proxy () (EigenstratSnpEntry, GenoLine) () X (SafeT IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ())
-> (PoseidonException -> IO ())
-> PoseidonException
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (PoseidonException
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ())
-> PoseidonException
-> Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall a b. (a -> b) -> a -> b
$
[Char] -> PoseidonException
PoseidonGenericException [Char]
"only Outformats EIGENSTRAT or PLINK are allowed at the moment"
let extractPipe :: Pipe
(EigenstratSnpEntry, GenoLine)
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
r
extractPipe = if Bool
packageWise then Pipe
(EigenstratSnpEntry, GenoLine)
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
r
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat else ((EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine))
-> Pipe
(EigenstratSnpEntry, GenoLine)
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map ([Int]
-> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices [Int]
relevantIndices)
let forgePipe :: Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forgePipe = Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
eigenstratProd Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
()
(EigenstratSnpEntry, GenoLine)
()
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
()
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
LogA
-> UTCTime
-> Proxy
()
(EigenstratSnpEntry, GenoLine)
()
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
()
forall (m :: * -> *) a.
MonadIO m =>
LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress LogA
logA UTCTime
currentTime Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
()
(EigenstratSnpEntry, GenoLine)
()
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
()
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
Proxy
()
(EigenstratSnpEntry, GenoLine)
()
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
()
forall {r}.
Pipe
(EigenstratSnpEntry, GenoLine)
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
r
extractPipe Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
()
(EigenstratSnpEntry, GenoLine)
()
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
()
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> Proxy
()
(EigenstratSnpEntry, GenoLine)
()
(EigenstratSnpEntry, GenoLine)
(SafeT IO)
()
forall (m :: * -> *) a r. Monad m => Consumer a m r -> Pipe a a m r
P.tee Consumer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
outConsumer
let startAcc :: SafeT IO (IOVector Int)
startAcc = IO (IOVector Int) -> SafeT IO (IOVector Int)
forall a. IO a -> SafeT IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IOVector Int) -> SafeT IO (IOVector Int))
-> IO (IOVector Int) -> SafeT IO (IOVector Int)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> IO (MVector (PrimState IO) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
VUM.replicate ([EigenstratIndEntry] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [EigenstratIndEntry]
newEigenstratIndEntries) Int
0
(IOVector Int
-> (EigenstratSnpEntry, GenoLine) -> SafeT IO (IOVector Int))
-> SafeT IO (IOVector Int)
-> (IOVector Int -> SafeT IO (IOVector Int))
-> Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
-> SafeT IO (IOVector Int)
forall (m :: * -> *) x a b.
Monad m =>
(x -> a -> m x) -> m x -> (x -> m b) -> Producer a m () -> m b
P.foldM IOVector Int
-> (EigenstratSnpEntry, GenoLine) -> SafeT IO (IOVector Int)
sumNonMissingSNPs SafeT IO (IOVector Int)
startAcc IOVector Int -> SafeT IO (IOVector Int)
forall a. a -> SafeT IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Producer (EigenstratSnpEntry, GenoLine) (SafeT IO) ()
forgePipe
) (PoseidonException -> IO (IOVector Int)
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO (IOVector Int))
-> (SomeException -> PoseidonException)
-> SomeException
-> IO (IOVector Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorLength -> SomeException -> PoseidonException
PoseidonGenotypeExceptionForward ErrorLength
errLength)
[Char] -> PoseidonIO ()
logInfo [Char]
"Done"
IOVector Int -> PoseidonIO (IOVector Int)
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector Int
newNrSNPs
writingJannoFile :: FilePath -> String -> VUM.MVector VUM.RealWorld Int -> [JannoRow] -> PoseidonIO ()
writingJannoFile :: [Char] -> [Char] -> IOVector Int -> [JannoRow] -> PoseidonIO ()
writingJannoFile [Char]
outPath [Char]
outName IOVector Int
newNrSNPs [JannoRow]
rows = do
[Char] -> PoseidonIO ()
logInfo [Char]
"Creating .janno file"
Vector Int
snpList <- IO (Vector Int) -> ReaderT Env IO (Vector Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Vector Int) -> ReaderT Env IO (Vector Int))
-> IO (Vector Int) -> ReaderT Env IO (Vector Int)
forall a b. (a -> b) -> a -> b
$ MVector (PrimState IO) Int -> IO (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.freeze IOVector Int
MVector (PrimState IO) Int
newNrSNPs
let jannoRowsWithNewSNPNumbers :: [JannoRow]
jannoRowsWithNewSNPNumbers =
(JannoRow -> Int -> JannoRow) -> [JannoRow] -> [Int] -> [JannoRow]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\JannoRow
x Int
y -> JannoRow
x {jNrSNPs :: Maybe JannoNrSNPs
jNrSNPs = JannoNrSNPs -> Maybe JannoNrSNPs
forall a. a -> Maybe a
Just (Int -> JannoNrSNPs
JannoNrSNPs Int
y)}) [JannoRow]
rows (Vector Int -> [Int]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Int
snpList)
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> JannoRows -> IO ()
writeJannoFile ([Char]
outPath [Char] -> [Char] -> [Char]
</> [Char]
outName [Char] -> [Char] -> [Char]
<.> [Char]
"janno") ([JannoRow] -> JannoRows
JannoRows [JannoRow]
jannoRowsWithNewSNPNumbers)
sumNonMissingSNPs :: VUM.IOVector Int -> (EigenstratSnpEntry, GenoLine) -> SafeT IO (VUM.IOVector Int)
sumNonMissingSNPs :: IOVector Int
-> (EigenstratSnpEntry, GenoLine) -> SafeT IO (IOVector Int)
sumNonMissingSNPs IOVector Int
accumulator (EigenstratSnpEntry
_, GenoLine
geno) = do
[(GenoEntry, Int)]
-> ((GenoEntry, Int) -> SafeT IO ()) -> SafeT IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([GenoEntry] -> [Int] -> [(GenoEntry, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip (GenoLine -> [GenoEntry]
forall a. Vector a -> [a]
V.toList GenoLine
geno) [Int
0..]) (((GenoEntry, Int) -> SafeT IO ()) -> SafeT IO ())
-> ((GenoEntry, Int) -> SafeT IO ()) -> SafeT IO ()
forall a b. (a -> b) -> a -> b
$ \(GenoEntry
g, Int
i) -> do
let x :: Int
x = GenoEntry -> Int
nonMissingToInt GenoEntry
g
MVector (PrimState (SafeT IO)) Int
-> (Int -> Int) -> Int -> SafeT IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
VUM.modify IOVector Int
MVector (PrimState (SafeT IO)) Int
accumulator (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
x) Int
i
IOVector Int -> SafeT IO (IOVector Int)
forall a. a -> SafeT IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector Int
accumulator
where
nonMissingToInt :: GenoEntry -> Int
nonMissingToInt :: GenoEntry -> Int
nonMissingToInt GenoEntry
x
| GenoEntry
x GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
== GenoEntry
Missing = Int
0
| Bool
otherwise = Int
1
filterSeqSourceRows :: JannoRows -> SeqSourceRows -> SeqSourceRows
filterSeqSourceRows :: JannoRows -> SeqSourceRows -> SeqSourceRows
filterSeqSourceRows (JannoRows [JannoRow]
jRows) (SeqSourceRows [SeqSourceRow]
sRows) =
let desiredPoseidonIDs :: [[Char]]
desiredPoseidonIDs = (JannoRow -> [Char]) -> [JannoRow] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map JannoRow -> [Char]
jPoseidonID [JannoRow]
jRows
in [SeqSourceRow] -> SeqSourceRows
SeqSourceRows ([SeqSourceRow] -> SeqSourceRows)
-> [SeqSourceRow] -> SeqSourceRows
forall a b. (a -> b) -> a -> b
$ (SeqSourceRow -> Bool) -> [SeqSourceRow] -> [SeqSourceRow]
forall a. (a -> Bool) -> [a] -> [a]
filter ([[Char]] -> SeqSourceRow -> Bool
hasAPoseidonID [[Char]]
desiredPoseidonIDs) [SeqSourceRow]
sRows
where
hasAPoseidonID :: [String] -> SeqSourceRow -> Bool
hasAPoseidonID :: [[Char]] -> SeqSourceRow -> Bool
hasAPoseidonID [[Char]]
jIDs SeqSourceRow
seqSourceRow =
let sIDs :: [[Char]]
sIDs = Maybe (ListColumn [Char]) -> [[Char]]
forall a. Maybe (ListColumn a) -> [a]
getMaybeListColumn (Maybe (ListColumn [Char]) -> [[Char]])
-> Maybe (ListColumn [Char]) -> [[Char]]
forall a b. (a -> b) -> a -> b
$ SeqSourceRow -> Maybe (ListColumn [Char])
sPoseidonID SeqSourceRow
seqSourceRow
in ([Char] -> Bool) -> [[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ([Char] -> [[Char]] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [[Char]]
jIDs) [[Char]]
sIDs
filterBibEntries :: JannoRows -> BibTeX -> BibTeX
filterBibEntries :: JannoRows -> [BibEntry] -> [BibEntry]
filterBibEntries (JannoRows [JannoRow]
rows) [BibEntry]
references_ =
let relevantPublications :: [[Char]]
relevantPublications = (JannoPublication -> [Char]) -> [JannoPublication] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map JannoPublication -> [Char]
forall a. Show a => a -> [Char]
show ([JannoPublication] -> [[Char]])
-> ([JannoRow] -> [JannoPublication]) -> [JannoRow] -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [JannoPublication] -> [JannoPublication]
forall a. Eq a => [a] -> [a]
nub ([JannoPublication] -> [JannoPublication])
-> ([JannoRow] -> [JannoPublication])
-> [JannoRow]
-> [JannoPublication]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ListColumn JannoPublication -> [JannoPublication])
-> [ListColumn JannoPublication] -> [JannoPublication]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ListColumn JannoPublication -> [JannoPublication]
forall a. ListColumn a -> [a]
getListColumn ([ListColumn JannoPublication] -> [JannoPublication])
-> ([JannoRow] -> [ListColumn JannoPublication])
-> [JannoRow]
-> [JannoPublication]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (JannoRow -> Maybe (ListColumn JannoPublication))
-> [JannoRow] -> [ListColumn JannoPublication]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe JannoRow -> Maybe (ListColumn JannoPublication)
jPublication ([JannoRow] -> [[Char]]) -> [JannoRow] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [JannoRow]
rows
in (BibEntry -> Bool) -> [BibEntry] -> [BibEntry]
forall a. (a -> Bool) -> [a] -> [a]
filter (\BibEntry
x-> BibEntry -> [Char]
bibEntryId BibEntry
x [Char] -> [[Char]] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [[Char]]
relevantPublications) [BibEntry]
references_
fillMissingSnpSets :: [PoseidonPackage] -> PoseidonIO [SNPSetSpec]
fillMissingSnpSets :: [PoseidonPackage] -> PoseidonIO [SNPSetSpec]
fillMissingSnpSets [PoseidonPackage]
packages = [PoseidonPackage]
-> (PoseidonPackage -> ReaderT Env IO SNPSetSpec)
-> PoseidonIO [SNPSetSpec]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [PoseidonPackage]
packages ((PoseidonPackage -> ReaderT Env IO SNPSetSpec)
-> PoseidonIO [SNPSetSpec])
-> (PoseidonPackage -> ReaderT Env IO SNPSetSpec)
-> PoseidonIO [SNPSetSpec]
forall a b. (a -> b) -> a -> b
$ \PoseidonPackage
pac -> do
let pac_ :: PacNameAndVersion
pac_ = PoseidonPackage -> PacNameAndVersion
posPacNameAndVersion PoseidonPackage
pac
maybeSnpSet :: Maybe SNPSetSpec
maybeSnpSet = GenotypeDataSpec -> Maybe SNPSetSpec
genotypeSnpSet (GenotypeDataSpec -> Maybe SNPSetSpec)
-> (PoseidonPackage -> GenotypeDataSpec)
-> PoseidonPackage
-> Maybe SNPSetSpec
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonPackage -> GenotypeDataSpec
posPacGenotypeData (PoseidonPackage -> Maybe SNPSetSpec)
-> PoseidonPackage -> Maybe SNPSetSpec
forall a b. (a -> b) -> a -> b
$ PoseidonPackage
pac
case Maybe SNPSetSpec
maybeSnpSet of
Just SNPSetSpec
s -> SNPSetSpec -> ReaderT Env IO SNPSetSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SNPSetSpec
s
Maybe SNPSetSpec
Nothing -> do
[Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Warning for package " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ PacNameAndVersion -> [Char]
forall a. Show a => a -> [Char]
show PacNameAndVersion
pac_ [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
": field \"snpSet\" \
\is not set. I will interpret this as \"snpSet: Other\""
SNPSetSpec -> ReaderT Env IO SNPSetSpec
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SNPSetSpec
SNPSetOther