{-# LANGUAGE OverloadedStrings #-}
module Poseidon.GenotypeData where
import Poseidon.Utils (LogA, PoseidonException (..),
PoseidonIO, checkFile,
envInputPlinkMode, logDebug,
logInfo, logWithEnv, padLeft)
import Control.Exception (throwIO)
import Control.Monad (forM, unless)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Aeson (FromJSON, ToJSON, object,
parseJSON, toJSON, withObject,
withText, (.:), (.:?), (.=))
import Data.ByteString (isPrefixOf)
import Data.IORef (modifyIORef, newIORef, readIORef)
import Data.List (nub, sort)
import Data.Maybe (catMaybes)
import qualified Data.Text as T
import Data.Time (NominalDiffTime, UTCTime,
diffUTCTime, getCurrentTime)
import qualified Data.Vector as V
import Pipes (Pipe, Producer, cat, for, yield,
(>->))
import Pipes.Safe (MonadSafe, runSafeT)
import SequenceFormats.Eigenstrat (EigenstratIndEntry (..),
EigenstratSnpEntry (..),
GenoEntry (..), GenoLine, Sex (..),
readEigenstrat, readEigenstratInd)
import SequenceFormats.FreqSum (FreqSumEntry (..))
import SequenceFormats.Plink (plinkFam2EigenstratInd,
readFamFile, readPlink)
import SequenceFormats.VCF (VCFentry (..), VCFheader (..),
readVCFfromFile, vcfToFreqSumEntry)
import System.FilePath (takeDirectory, takeFileName, (</>))
data GenoDataSource = PacBaseDir
{ GenoDataSource -> [Char]
getPacBaseDir :: FilePath
}
| GenoDirect
{ GenoDataSource -> GenotypeDataSpec
getGenoDirect :: GenotypeDataSpec
}
deriving Int -> GenoDataSource -> ShowS
[GenoDataSource] -> ShowS
GenoDataSource -> [Char]
(Int -> GenoDataSource -> ShowS)
-> (GenoDataSource -> [Char])
-> ([GenoDataSource] -> ShowS)
-> Show GenoDataSource
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenoDataSource -> ShowS
showsPrec :: Int -> GenoDataSource -> ShowS
$cshow :: GenoDataSource -> [Char]
show :: GenoDataSource -> [Char]
$cshowList :: [GenoDataSource] -> ShowS
showList :: [GenoDataSource] -> ShowS
Show
data GenotypeDataSpec = GenotypeDataSpec {
GenotypeDataSpec -> GenotypeFileSpec
genotypeFileSpec :: GenotypeFileSpec,
GenotypeDataSpec -> Maybe SNPSetSpec
genotypeSnpSet :: Maybe SNPSetSpec
} deriving (Int -> GenotypeDataSpec -> ShowS
[GenotypeDataSpec] -> ShowS
GenotypeDataSpec -> [Char]
(Int -> GenotypeDataSpec -> ShowS)
-> (GenotypeDataSpec -> [Char])
-> ([GenotypeDataSpec] -> ShowS)
-> Show GenotypeDataSpec
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenotypeDataSpec -> ShowS
showsPrec :: Int -> GenotypeDataSpec -> ShowS
$cshow :: GenotypeDataSpec -> [Char]
show :: GenotypeDataSpec -> [Char]
$cshowList :: [GenotypeDataSpec] -> ShowS
showList :: [GenotypeDataSpec] -> ShowS
Show, GenotypeDataSpec -> GenotypeDataSpec -> Bool
(GenotypeDataSpec -> GenotypeDataSpec -> Bool)
-> (GenotypeDataSpec -> GenotypeDataSpec -> Bool)
-> Eq GenotypeDataSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
== :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
$c/= :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
/= :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
Eq)
data GenotypeFileSpec = GenotypeEigenstrat {
GenotypeFileSpec -> [Char]
_esGenoFile :: FilePath,
GenotypeFileSpec -> Maybe [Char]
_esGenoFileChkSum :: Maybe String,
GenotypeFileSpec -> [Char]
_esSnpFile :: FilePath,
GenotypeFileSpec -> Maybe [Char]
_esSnpFileChkSum :: Maybe String,
GenotypeFileSpec -> [Char]
_esIndFile :: FilePath,
GenotypeFileSpec -> Maybe [Char]
_esIndFileChkSum :: Maybe String
} | GenotypePlink {
GenotypeFileSpec -> [Char]
_plGenoFile :: FilePath,
GenotypeFileSpec -> Maybe [Char]
_plGenoFileChkSum :: Maybe String,
GenotypeFileSpec -> [Char]
_plSnpFile :: FilePath,
GenotypeFileSpec -> Maybe [Char]
_plSnpFileChkSum :: Maybe String,
GenotypeFileSpec -> [Char]
_plIndFile :: FilePath,
GenotypeFileSpec -> Maybe [Char]
_plIndFileChkSum :: Maybe String
} | GenotypeVCF {
GenotypeFileSpec -> [Char]
_vcfGenoFile :: FilePath,
GenotypeFileSpec -> Maybe [Char]
_vcfGenoFileChkSum :: Maybe String
} deriving (Int -> GenotypeFileSpec -> ShowS
[GenotypeFileSpec] -> ShowS
GenotypeFileSpec -> [Char]
(Int -> GenotypeFileSpec -> ShowS)
-> (GenotypeFileSpec -> [Char])
-> ([GenotypeFileSpec] -> ShowS)
-> Show GenotypeFileSpec
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenotypeFileSpec -> ShowS
showsPrec :: Int -> GenotypeFileSpec -> ShowS
$cshow :: GenotypeFileSpec -> [Char]
show :: GenotypeFileSpec -> [Char]
$cshowList :: [GenotypeFileSpec] -> ShowS
showList :: [GenotypeFileSpec] -> ShowS
Show, GenotypeFileSpec -> GenotypeFileSpec -> Bool
(GenotypeFileSpec -> GenotypeFileSpec -> Bool)
-> (GenotypeFileSpec -> GenotypeFileSpec -> Bool)
-> Eq GenotypeFileSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
== :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
$c/= :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
/= :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
Eq)
instance FromJSON GenotypeDataSpec where
parseJSON :: Value -> Parser GenotypeDataSpec
parseJSON = [Char]
-> (Object -> Parser GenotypeDataSpec)
-> Value
-> Parser GenotypeDataSpec
forall a. [Char] -> (Object -> Parser a) -> Value -> Parser a
withObject [Char]
"GenotypeData" ((Object -> Parser GenotypeDataSpec)
-> Value -> Parser GenotypeDataSpec)
-> (Object -> Parser GenotypeDataSpec)
-> Value
-> Parser GenotypeDataSpec
forall a b. (a -> b) -> a -> b
$ \Object
v -> do
Text
gformat <- Object
v Object -> Key -> Parser Text
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"format"
GenotypeFileSpec
gfileSpec <- case Text
gformat of
Text
"EIGENSTRAT" -> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypeEigenstrat
([Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec)
-> Parser [Char]
-> Parser
(Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"genoFile"
Parser
(Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser
([Char]
-> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"genoFileChkSum"
Parser
([Char]
-> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char]
-> Parser
(Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"snpFile"
Parser (Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpFileChkSum"
Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char] -> Parser (Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"indFile"
Parser (Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char]) -> Parser GenotypeFileSpec
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"indFileChkSum"
Text
"PLINK" -> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypePlink
([Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec)
-> Parser [Char]
-> Parser
(Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"genoFile"
Parser
(Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser
([Char]
-> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"genoFileChkSum"
Parser
([Char]
-> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char]
-> Parser
(Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"snpFile"
Parser (Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpFileChkSum"
Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char] -> Parser (Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"indFile"
Parser (Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char]) -> Parser GenotypeFileSpec
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"indFileChkSum"
Text
"VCF" -> [Char] -> Maybe [Char] -> GenotypeFileSpec
GenotypeVCF
([Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char] -> Parser (Maybe [Char] -> GenotypeFileSpec)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"genoFile"
Parser (Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char]) -> Parser GenotypeFileSpec
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"genoFileChkSum"
Text
_ -> [Char] -> Parser GenotypeFileSpec
forall a. [Char] -> Parser a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"unknown format " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
gformat)
Maybe SNPSetSpec
snpSet <- Object
v Object -> Key -> Parser (Maybe SNPSetSpec)
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpSet"
GenotypeDataSpec -> Parser GenotypeDataSpec
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenotypeDataSpec -> Parser GenotypeDataSpec)
-> GenotypeDataSpec -> Parser GenotypeDataSpec
forall a b. (a -> b) -> a -> b
$ GenotypeFileSpec -> Maybe SNPSetSpec -> GenotypeDataSpec
GenotypeDataSpec GenotypeFileSpec
gfileSpec Maybe SNPSetSpec
snpSet
instance ToJSON GenotypeDataSpec where
toJSON :: GenotypeDataSpec -> Value
toJSON (GenotypeDataSpec GenotypeFileSpec
gfileSpec Maybe SNPSetSpec
snpSet) = case GenotypeFileSpec
gfileSpec of
GenotypeEigenstrat [Char]
genoF Maybe [Char]
genoFchk [Char]
snpF Maybe [Char]
snpFchk [Char]
indF Maybe [Char]
indFchk ->
[Pair] -> Value
object [
Key
"format" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= ([Char]
"EIGENSTRAT" :: String),
Key
"genoFile" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
genoF,
Key
"genoFileChkSum"Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
genoFchk,
Key
"snpFile" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
snpF,
Key
"snpFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
snpFchk,
Key
"indFile" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
indF,
Key
"indFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
indFchk,
Key
"snpSet" Key -> Maybe SNPSetSpec -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe SNPSetSpec
snpSet
]
GenotypePlink [Char]
genoF Maybe [Char]
genoFchk [Char]
snpF Maybe [Char]
snpFchk [Char]
indF Maybe [Char]
indFchk ->
[Pair] -> Value
object [
Key
"format" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= ([Char]
"PLINK" :: String),
Key
"genoFile" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
genoF,
Key
"genoFileChkSum"Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
genoFchk,
Key
"snpFile" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
snpF,
Key
"snpFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
snpFchk,
Key
"indFile" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
indF,
Key
"indFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
indFchk,
Key
"snpSet" Key -> Maybe SNPSetSpec -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe SNPSetSpec
snpSet
]
GenotypeVCF [Char]
genoF Maybe [Char]
genoFchk ->
[Pair] -> Value
object [
Key
"format" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= ([Char]
"VCF" :: String),
Key
"genoFile" Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
genoF,
Key
"genoFileChkSum"Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
genoFchk
]
data SNPSetSpec = SNPSet1240K
| SNPSetHumanOrigins
| SNPSetOther
deriving (SNPSetSpec -> SNPSetSpec -> Bool
(SNPSetSpec -> SNPSetSpec -> Bool)
-> (SNPSetSpec -> SNPSetSpec -> Bool) -> Eq SNPSetSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SNPSetSpec -> SNPSetSpec -> Bool
== :: SNPSetSpec -> SNPSetSpec -> Bool
$c/= :: SNPSetSpec -> SNPSetSpec -> Bool
/= :: SNPSetSpec -> SNPSetSpec -> Bool
Eq)
instance Show SNPSetSpec where
show :: SNPSetSpec -> [Char]
show SNPSetSpec
SNPSet1240K = [Char]
"1240K"
show SNPSetSpec
SNPSetHumanOrigins = [Char]
"HumanOrigins"
show SNPSetSpec
SNPSetOther = [Char]
"Other"
instance FromJSON SNPSetSpec where
parseJSON :: Value -> Parser SNPSetSpec
parseJSON = [Char] -> (Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec
forall a. [Char] -> (Text -> Parser a) -> Value -> Parser a
withText [Char]
"snpSet" ((Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec)
-> (Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec
forall a b. (a -> b) -> a -> b
$ \Text
v -> case Text
v of
Text
"1240K" -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSet1240K
Text
"HumanOrigins" -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSetHumanOrigins
Text
"Other" -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSetOther
Text
_ -> [Char] -> Parser SNPSetSpec
forall a. [Char] -> Parser a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"unknown snpSet " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
v)
instance ToJSON SNPSetSpec where
toJSON :: SNPSetSpec -> Value
toJSON SNPSetSpec
a = case SNPSetSpec
a of
SNPSetSpec
SNPSet1240K -> Value
"1240K"
SNPSetSpec
SNPSetHumanOrigins -> Value
"HumanOrigins"
SNPSetSpec
SNPSetOther -> Value
"Other"
snpSetMergeList :: [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList :: [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList (SNPSetSpec
x:[SNPSetSpec]
xs) Bool
intersect = (SNPSetSpec -> SNPSetSpec -> SNPSetSpec)
-> SNPSetSpec -> [SNPSetSpec] -> SNPSetSpec
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\SNPSetSpec
a SNPSetSpec
b -> SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge SNPSetSpec
a SNPSetSpec
b Bool
intersect) SNPSetSpec
x [SNPSetSpec]
xs
snpSetMergeList [SNPSetSpec]
_ Bool
_ = [Char] -> SNPSetSpec
forall a. HasCallStack => [Char] -> a
error [Char]
"snpSetMergeList: This should never happen"
snpSetMerge :: SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge :: SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge SNPSetSpec
SNPSet1240K SNPSetSpec
SNPSet1240K Bool
_ = SNPSetSpec
SNPSet1240K
snpSetMerge SNPSetSpec
SNPSetHumanOrigins SNPSetSpec
SNPSetHumanOrigins Bool
_ = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSetOther SNPSetSpec
_ Bool
_ = SNPSetSpec
SNPSetOther
snpSetMerge SNPSetSpec
_ SNPSetSpec
SNPSetOther Bool
_ = SNPSetSpec
SNPSetOther
snpSetMerge SNPSetSpec
SNPSet1240K SNPSetSpec
SNPSetHumanOrigins Bool
True = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSetHumanOrigins SNPSetSpec
SNPSet1240K Bool
True = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSet1240K SNPSetSpec
SNPSetHumanOrigins Bool
False = SNPSetSpec
SNPSet1240K
snpSetMerge SNPSetSpec
SNPSetHumanOrigins SNPSetSpec
SNPSet1240K Bool
False = SNPSetSpec
SNPSet1240K
reduceGenotypeFilepaths :: (MonadThrow m) => GenotypeDataSpec -> m (FilePath, GenotypeDataSpec)
reduceGenotypeFilepaths :: forall (m :: * -> *).
MonadThrow m =>
GenotypeDataSpec -> m ([Char], GenotypeDataSpec)
reduceGenotypeFilepaths gd :: GenotypeDataSpec
gd@(GenotypeDataSpec GenotypeFileSpec
gFileSpec Maybe SNPSetSpec
_) = do
([Char]
baseDir, GenotypeFileSpec
newGfileSpec) <- case GenotypeFileSpec
gFileSpec of
GenotypeEigenstrat [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> do
let baseDirs :: [[Char]]
baseDirs = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeDirectory [[Char]
genoF, [Char]
snpF, [Char]
indF]
fileNames :: [[Char]]
fileNames = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeFileName [[Char]
genoF, [Char]
snpF, [Char]
indF]
Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (([Char] -> Bool) -> [[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ([Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
==([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs)) [[Char]]
baseDirs) (m () -> m ())
-> (PoseidonException -> m ()) -> PoseidonException -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> m ()
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> m ()) -> PoseidonException -> m ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> PoseidonException
PoseidonUnequalBaseDirException [Char]
genoF [Char]
snpF [Char]
indF
([Char], GenotypeFileSpec) -> m ([Char], GenotypeFileSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs, GenotypeFileSpec
gFileSpec {_esGenoFile :: [Char]
_esGenoFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
0, _esSnpFile :: [Char]
_esSnpFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
1, _esIndFile :: [Char]
_esIndFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
2})
GenotypePlink [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> do
let baseDirs :: [[Char]]
baseDirs = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeDirectory [[Char]
genoF, [Char]
snpF, [Char]
indF]
fileNames :: [[Char]]
fileNames = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeFileName [[Char]
genoF, [Char]
snpF, [Char]
indF]
Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (([Char] -> Bool) -> [[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ([Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
==([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs)) [[Char]]
baseDirs) (m () -> m ())
-> (PoseidonException -> m ()) -> PoseidonException -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> m ()
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> m ()) -> PoseidonException -> m ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> PoseidonException
PoseidonUnequalBaseDirException [Char]
genoF [Char]
snpF [Char]
indF
([Char], GenotypeFileSpec) -> m ([Char], GenotypeFileSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs, GenotypeFileSpec
gFileSpec {_plGenoFile :: [Char]
_plGenoFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
0, _plSnpFile :: [Char]
_plSnpFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
1, _plIndFile :: [Char]
_plIndFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
2})
GenotypeVCF [Char]
genoF Maybe [Char]
_ -> do
let baseDir :: [Char]
baseDir = ShowS
takeDirectory [Char]
genoF
fileName :: [Char]
fileName = ShowS
takeFileName [Char]
genoF
([Char], GenotypeFileSpec) -> m ([Char], GenotypeFileSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char]
baseDir, GenotypeFileSpec
gFileSpec {_vcfGenoFile :: [Char]
_vcfGenoFile = [Char]
fileName})
([Char], GenotypeDataSpec) -> m ([Char], GenotypeDataSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char]
baseDir, GenotypeDataSpec
gd {genotypeFileSpec :: GenotypeFileSpec
genotypeFileSpec = GenotypeFileSpec
newGfileSpec})
loadIndividuals :: FilePath
-> GenotypeDataSpec
-> PoseidonIO [EigenstratIndEntry]
loadIndividuals :: [Char] -> GenotypeDataSpec -> PoseidonIO [EigenstratIndEntry]
loadIndividuals [Char]
d (GenotypeDataSpec GenotypeFileSpec
gFileSpec Maybe SNPSetSpec
_) = do
PlinkPopNameMode
popMode <- PoseidonIO PlinkPopNameMode
envInputPlinkMode
case GenotypeFileSpec
gFileSpec of
GenotypeEigenstrat [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
fn Maybe [Char]
fnChk -> do
IO () -> ReaderT Env IO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ReaderT Env IO ()) -> IO () -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile ([Char]
d [Char] -> ShowS
</> [Char]
fn) Maybe [Char]
fnChk
[Char] -> PoseidonIO [EigenstratIndEntry]
forall (m :: * -> *). MonadIO m => [Char] -> m [EigenstratIndEntry]
readEigenstratInd ([Char]
d [Char] -> ShowS
</> [Char]
fn)
GenotypePlink [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
fn Maybe [Char]
fnChk -> do
IO () -> ReaderT Env IO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ReaderT Env IO ()) -> IO () -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile ([Char]
d [Char] -> ShowS
</> [Char]
fn) Maybe [Char]
fnChk
(PlinkFamEntry -> EigenstratIndEntry)
-> [PlinkFamEntry] -> [EigenstratIndEntry]
forall a b. (a -> b) -> [a] -> [b]
map (PlinkPopNameMode -> PlinkFamEntry -> EigenstratIndEntry
plinkFam2EigenstratInd PlinkPopNameMode
popMode) ([PlinkFamEntry] -> [EigenstratIndEntry])
-> ReaderT Env IO [PlinkFamEntry]
-> PoseidonIO [EigenstratIndEntry]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> ReaderT Env IO [PlinkFamEntry]
forall (m :: * -> *). MonadIO m => [Char] -> m [PlinkFamEntry]
readFamFile ([Char]
d [Char] -> ShowS
</> [Char]
fn)
GenotypeVCF [Char]
fn Maybe [Char]
fnChk -> do
IO () -> ReaderT Env IO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ReaderT Env IO ()) -> IO () -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile ([Char]
d [Char] -> ShowS
</> [Char]
fn) Maybe [Char]
fnChk
(VCFheader [[Char]]
_ [[Char]]
sampleNames , Producer VCFentry (SafeT IO) ()
_) <- IO (VCFheader, Producer VCFentry (SafeT IO) ())
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (VCFheader, Producer VCFentry (SafeT IO) ())
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> ([Char] -> IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> [Char]
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ())
-> IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall (m :: * -> *) r.
(MonadMask m, MonadIO m) =>
SafeT m r -> m r
runSafeT (SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ())
-> IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> ([Char]
-> SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> [Char]
-> IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall (m :: * -> *).
MonadSafe m =>
[Char] -> m (VCFheader, Producer VCFentry m ())
readVCFfromFile ([Char]
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> [Char]
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall a b. (a -> b) -> a -> b
$ ([Char]
d [Char] -> ShowS
</> [Char]
fn)
[EigenstratIndEntry] -> PoseidonIO [EigenstratIndEntry]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [[Char] -> Sex -> [Char] -> EigenstratIndEntry
EigenstratIndEntry [Char]
s Sex
Unknown [Char]
"unknown" | [Char]
s <- [[Char]]
sampleNames]
loadGenotypeData :: (MonadSafe m) =>
FilePath
-> GenotypeDataSpec
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
loadGenotypeData :: forall (m :: * -> *).
MonadSafe m =>
[Char]
-> GenotypeDataSpec
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
loadGenotypeData [Char]
baseDir (GenotypeDataSpec GenotypeFileSpec
gFileSpec Maybe SNPSetSpec
_) =
case GenotypeFileSpec
gFileSpec of
GenotypeEigenstrat [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a, b) -> b
snd (([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
-> Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char]
-> [Char]
-> [Char]
-> m ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> m ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
readEigenstrat ([Char]
baseDir [Char] -> ShowS
</> [Char]
genoF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
snpF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
indF)
GenotypePlink [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> ([PlinkFamEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a, b) -> b
snd (([PlinkFamEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
-> Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([PlinkFamEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char]
-> [Char]
-> [Char]
-> m ([PlinkFamEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> m ([PlinkFamEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
readPlink ([Char]
baseDir [Char] -> ShowS
</> [Char]
genoF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
snpF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
indF)
GenotypeVCF [Char]
fn Maybe [Char]
_ -> do
Producer VCFentry m ()
vcfProd <- (VCFheader, Producer VCFentry m ()) -> Producer VCFentry m ()
forall a b. (a, b) -> b
snd ((VCFheader, Producer VCFentry m ()) -> Producer VCFentry m ())
-> m (VCFheader, Producer VCFentry m ())
-> m (Producer VCFentry m ())
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> m (VCFheader, Producer VCFentry m ())
forall (m :: * -> *).
MonadSafe m =>
[Char] -> m (VCFheader, Producer VCFentry m ())
readVCFfromFile ([Char]
baseDir [Char] -> ShowS
</> [Char]
fn)
Producer (EigenstratSnpEntry, GenoLine) m ()
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer (EigenstratSnpEntry, GenoLine) m ()
-> m (Producer (EigenstratSnpEntry, GenoLine) m ()))
-> Producer (EigenstratSnpEntry, GenoLine) m ()
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall a b. (a -> b) -> a -> b
$ Producer VCFentry m ()
vcfProd Producer VCFentry m ()
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) r.
MonadIO m =>
Pipe VCFentry (EigenstratSnpEntry, GenoLine) m r
vcf2eigenstratPipe
vcf2eigenstratPipe :: (MonadIO m) => Pipe VCFentry (EigenstratSnpEntry, GenoLine) m r
vcf2eigenstratPipe :: forall (m :: * -> *) r.
MonadIO m =>
Pipe VCFentry (EigenstratSnpEntry, GenoLine) m r
vcf2eigenstratPipe = Proxy () VCFentry () VCFentry m r
-> (VCFentry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m r
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Proxy () VCFentry () VCFentry m r
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat ((VCFentry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m r)
-> (VCFentry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m r
forall a b. (a -> b) -> a -> b
$ \VCFentry
vcfEntry -> do
case VCFentry -> Either [Char] FreqSumEntry
vcfToFreqSumEntry VCFentry
vcfEntry of
Right (FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
snpId_ Maybe Double
geneticPos Char
ref Char
alt [Maybe Int]
alleleCounts) -> do
let eigenstratSnpEntry :: EigenstratSnpEntry
eigenstratSnpEntry = Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos (Double -> (Double -> Double) -> Maybe Double -> Double
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Double
0.0 Double -> Double
forall a. a -> a
id Maybe Double
geneticPos) (ByteString
-> (ByteString -> ByteString) -> Maybe ByteString -> ByteString
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ByteString
"" ByteString -> ByteString
forall a. a -> a
id Maybe ByteString
snpId_) Char
ref Char
alt
GenoLine
genoLine <- [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
V.fromList ([GenoEntry] -> GenoLine)
-> Proxy
() VCFentry () (EigenstratSnpEntry, GenoLine) m [GenoEntry]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoLine
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Maybe Int]
-> (Maybe Int
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry)
-> Proxy
() VCFentry () (EigenstratSnpEntry, GenoLine) m [GenoEntry]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Maybe Int]
alleleCounts (\Maybe Int
alleleCount -> do
case Maybe Int
alleleCount of
Maybe Int
Nothing -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Missing
Just Int
0 -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomRef
Just Int
1 -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Het
Just Int
2 -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomAlt
Maybe Int
_ -> IO GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
IO a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry)
-> ([Char] -> IO GenoEntry)
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO GenoEntry
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO GenoEntry)
-> ([Char] -> PoseidonException) -> [Char] -> IO GenoEntry
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonGenotypeException ([Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry)
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a b. (a -> b) -> a -> b
$
[Char]
"illegal dosage (" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Maybe Int -> [Char]
forall a. Show a => a -> [Char]
show Maybe Int
alleleCount [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
") in VCF file at chrom " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Chrom -> [Char]
forall a. Show a => a -> [Char]
show Chrom
chrom [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
", position " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
pos)
(EigenstratSnpEntry, GenoLine)
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (EigenstratSnpEntry
eigenstratSnpEntry, GenoLine
genoLine)
Left [Char]
err -> IO () -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall a.
IO a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> ([Char] -> IO ())
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO ())
-> ([Char] -> PoseidonException) -> [Char] -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonGenotypeException ([Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a -> b) -> a -> b
$ [Char]
err
joinEntries :: (MonadIO m) => LogA -> [Int] -> [String] -> [Maybe (EigenstratSnpEntry, GenoLine)] -> m (EigenstratSnpEntry, GenoLine)
joinEntries :: forall (m :: * -> *).
MonadIO m =>
LogA
-> [Int]
-> [[Char]]
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> m (EigenstratSnpEntry, GenoLine)
joinEntries LogA
logA [Int]
nrInds [[Char]]
pacNames [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList = do
let allSnpEntries :: [EigenstratSnpEntry]
allSnpEntries = ((EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry)
-> [(EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry]
forall a b. (a -> b) -> [a] -> [b]
map (EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry
forall a b. (a, b) -> a
fst ([(EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry])
-> ([Maybe (EigenstratSnpEntry, GenoLine)]
-> [(EigenstratSnpEntry, GenoLine)])
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> [EigenstratSnpEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (EigenstratSnpEntry, GenoLine)]
-> [(EigenstratSnpEntry, GenoLine)]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry])
-> [Maybe (EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry]
forall a b. (a -> b) -> a -> b
$ [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList
EigenstratSnpEntry
consensusSnpEntry <- LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
forall (m :: * -> *).
MonadIO m =>
LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry LogA
logA [EigenstratSnpEntry]
allSnpEntries
[GenoLine]
recodedGenotypes <- [(Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))]
-> ((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
-> m GenoLine)
-> m [GenoLine]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM ([Int]
-> [[Char]]
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> [(Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [Int]
nrInds [[Char]]
pacNames [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList) (((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
-> m GenoLine)
-> m [GenoLine])
-> ((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
-> m GenoLine)
-> m [GenoLine]
forall a b. (a -> b) -> a -> b
$ \(Int
n, [Char]
name, Maybe (EigenstratSnpEntry, GenoLine)
maybeTuple) ->
case Maybe (EigenstratSnpEntry, GenoLine)
maybeTuple of
Maybe (EigenstratSnpEntry, GenoLine)
Nothing -> GenoLine -> m GenoLine
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> GenoEntry -> GenoLine
forall a. Int -> a -> Vector a
V.replicate Int
n GenoEntry
Missing)
Just (EigenstratSnpEntry
snpEntry, GenoLine
genoLine) -> case EigenstratSnpEntry
-> EigenstratSnpEntry -> GenoLine -> Either [Char] GenoLine
recodeAlleles EigenstratSnpEntry
consensusSnpEntry EigenstratSnpEntry
snpEntry GenoLine
genoLine of
Left [Char]
err -> do
let msg :: [Char]
msg = [Char]
"Error in genotype data of package " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
name [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err
IO GenoLine -> m GenoLine
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO GenoLine -> m GenoLine)
-> (PoseidonException -> IO GenoLine)
-> PoseidonException
-> m GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO GenoLine
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> m GenoLine)
-> PoseidonException -> m GenoLine
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenotypeException [Char]
msg
Right GenoLine
x -> GenoLine -> m GenoLine
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
x
(EigenstratSnpEntry, GenoLine) -> m (EigenstratSnpEntry, GenoLine)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratSnpEntry
consensusSnpEntry, [GenoLine] -> GenoLine
forall a. [Vector a] -> Vector a
V.concat [GenoLine]
recodedGenotypes)
getConsensusSnpEntry :: (MonadIO m) => LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry :: forall (m :: * -> *).
MonadIO m =>
LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry LogA
logA [EigenstratSnpEntry]
snpEntries = do
let chrom :: Chrom
chrom = EigenstratSnpEntry -> Chrom
snpChrom (EigenstratSnpEntry -> Chrom)
-> ([EigenstratSnpEntry] -> EigenstratSnpEntry)
-> [EigenstratSnpEntry]
-> Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EigenstratSnpEntry] -> EigenstratSnpEntry
forall a. HasCallStack => [a] -> a
head ([EigenstratSnpEntry] -> Chrom) -> [EigenstratSnpEntry] -> Chrom
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
pos :: Int
pos = EigenstratSnpEntry -> Int
snpPos (EigenstratSnpEntry -> Int)
-> ([EigenstratSnpEntry] -> EigenstratSnpEntry)
-> [EigenstratSnpEntry]
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EigenstratSnpEntry] -> EigenstratSnpEntry
forall a. HasCallStack => [a] -> a
head ([EigenstratSnpEntry] -> Int) -> [EigenstratSnpEntry] -> Int
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
uniqueIds :: [ByteString]
uniqueIds = [ByteString] -> [ByteString]
forall a. Eq a => [a] -> [a]
nub ([ByteString] -> [ByteString])
-> ([EigenstratSnpEntry] -> [ByteString])
-> [EigenstratSnpEntry]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (EigenstratSnpEntry -> ByteString)
-> [EigenstratSnpEntry] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map EigenstratSnpEntry -> ByteString
snpId ([EigenstratSnpEntry] -> [ByteString])
-> [EigenstratSnpEntry] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
uniqueGenPos :: [Double]
uniqueGenPos = [Double] -> [Double]
forall a. Ord a => [a] -> [a]
sort ([Double] -> [Double])
-> ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry]
-> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Double] -> [Double]
forall a. Eq a => [a] -> [a]
nub ([Double] -> [Double])
-> ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry]
-> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (EigenstratSnpEntry -> Double) -> [EigenstratSnpEntry] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map EigenstratSnpEntry -> Double
snpGeneticPos ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry] -> [Double]
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
allAlleles :: [Char]
allAlleles = [[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[Char
r, Char
a] | EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
r Char
a <- [EigenstratSnpEntry]
snpEntries]
uniqueAlleles :: [Char]
uniqueAlleles = ShowS
forall a. Eq a => [a] -> [a]
nub ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (\Char
a -> Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'N' Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'0' Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'X') ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ [Char]
allAlleles
ByteString
id_ <- case [ByteString]
uniqueIds of
[ByteString
i] -> ByteString -> m ByteString
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
i
[ByteString]
_ -> do
let rsIds :: [ByteString]
rsIds = (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> ByteString -> Bool
isPrefixOf ByteString
"rs") [ByteString]
uniqueIds
selectedId :: ByteString
selectedId = case [ByteString]
rsIds of
(ByteString
i:[ByteString]
_) -> ByteString
i
[ByteString]
_ -> [ByteString] -> ByteString
forall a. HasCallStack => [a] -> a
head [ByteString]
uniqueIds
LogA -> ReaderT Env IO () -> m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> m ())
-> ([Char] -> ReaderT Env IO ()) -> [Char] -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> ReaderT Env IO ()
logDebug ([Char] -> m ()) -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$
[Char]
"Found inconsistent SNP IDs: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [ByteString] -> [Char]
forall a. Show a => a -> [Char]
show [ByteString]
uniqueIds [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
". Choosing " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
selectedId
ByteString -> m ByteString
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
selectedId
Double
genPos <- case [Double]
uniqueGenPos of
[Double
p] -> Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
p
[Double
0.0, Double
p] -> Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
p
[Double]
_ -> do
let selectedGenPos :: Double
selectedGenPos = [Double] -> Double
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
uniqueGenPos
LogA -> ReaderT Env IO () -> m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> m ())
-> ([Char] -> ReaderT Env IO ()) -> [Char] -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> ReaderT Env IO ()
logDebug ([Char] -> m ()) -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$
[Char]
"Found inconsistent genetic positions in SNP " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
id_ [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Double] -> [Char]
forall a. Show a => a -> [Char]
show [Double]
uniqueGenPos [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
". Choosing " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Double -> [Char]
forall a. Show a => a -> [Char]
show Double
selectedGenPos
Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
selectedGenPos
case [Char]
uniqueAlleles of
[] -> do
EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
'N' Char
'N')
[Char
r] -> do
EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
'N' Char
r)
[Char
ref, Char
alt] ->
EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
ref Char
alt)
[Char]
_ -> IO EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO EigenstratSnpEntry -> m EigenstratSnpEntry)
-> (PoseidonException -> IO EigenstratSnpEntry)
-> PoseidonException
-> m EigenstratSnpEntry
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO EigenstratSnpEntry
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> m EigenstratSnpEntry)
-> PoseidonException -> m EigenstratSnpEntry
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenotypeException ([Char]
"Incongruent alleles: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [EigenstratSnpEntry] -> [Char]
forall a. Show a => a -> [Char]
show [EigenstratSnpEntry]
snpEntries)
recodeAlleles :: EigenstratSnpEntry -> EigenstratSnpEntry -> GenoLine -> Either String GenoLine
recodeAlleles :: EigenstratSnpEntry
-> EigenstratSnpEntry -> GenoLine -> Either [Char] GenoLine
recodeAlleles EigenstratSnpEntry
consensusSnpEntry EigenstratSnpEntry
snpEntry GenoLine
genoLine = do
let (EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
consRefA Char
consAltA) = EigenstratSnpEntry
consensusSnpEntry
let (EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
refA Char
altA) = EigenstratSnpEntry
snpEntry
let maybeRecodedGenoline :: Either [Char] GenoLine
maybeRecodedGenoline = case (Char -> Bool
isMissing Char
consRefA, Char -> Bool
isMissing Char
consAltA) of
(Bool
False, Bool
False) -> Char -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine1 Char
consRefA Char
consAltA Char
refA Char
altA
(Bool
False, Bool
True) -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine2 Char
consRefA Char
refA Char
altA
(Bool
True, Bool
False) -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine3 Char
consAltA Char
refA Char
altA
(Bool
True, Bool
True) -> Either [Char] GenoLine
maybeFlipGenoLine4
case Either [Char] GenoLine
maybeRecodedGenoline of
Left [Char]
err -> [Char] -> Either [Char] GenoLine
forall a b. a -> Either a b
Left ([Char]
"At snp " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ EigenstratSnpEntry -> [Char]
forall a. Show a => a -> [Char]
show EigenstratSnpEntry
snpEntry [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": allele coding error due to inconsistent \
\alleles with consensus alleles ref = " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
consRefA] [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
", alt = " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
consAltA] [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
". Error: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err)
Right GenoLine
recodedGenoLine -> GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
recodedGenoLine
where
isMissing :: Char -> Bool
isMissing Char
'0' = Bool
True
isMissing Char
'N' = Bool
True
isMissing Char
_ = Bool
False
maybeFlipGenoLine1 :: a -> a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine1 a
consRefA a
consAltA a
refA a
altA
| (a
refA, a
altA) (a, a) -> (a, a) -> Bool
forall a. Eq a => a -> a -> Bool
== (a
consRefA, a
consAltA) = GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
| (a
refA, a
altA) (a, a) -> (a, a) -> Bool
forall a. Eq a => a -> a -> Bool
== (a
consAltA, a
consRefA) = GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine)
| a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
| a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
| a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine)
| a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine)
| Bool
otherwise = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
maybeFlipGenoLine2 :: a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine2 a
consRefA a
refA a
altA
| a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
| a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine)
| Bool
otherwise = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
maybeFlipGenoLine3 :: a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine3 a
consAltA a
refA a
altA
| a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine)
| a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
| Bool
otherwise = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
maybeFlipGenoLine4 :: Either [Char] GenoLine
maybeFlipGenoLine4 = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
checked :: GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing Either [Char] b
action = if (GenoEntry -> Bool) -> GenoLine -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any (GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
Missing) GenoLine
genoLine then [Char] -> Either [Char] b
forall a b. a -> Either a b
Left [Char]
"Requiring all genotype missing" else Either [Char] b
action
checked GenoEntry
t Either [Char] b
action = if (GenoEntry -> Bool) -> GenoLine -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any (\GenoEntry
g -> GenoEntry
g GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
Missing Bool -> Bool -> Bool
&& GenoEntry
g GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
t) GenoLine
genoLine then [Char] -> Either [Char] b
forall a b. a -> Either a b
Left ([Char]
"requiring all genotypes missing or " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ GenoEntry -> [Char]
forall a. Show a => a -> [Char]
show GenoEntry
t) else Either [Char] b
action
flipGeno :: GenoEntry -> GenoEntry
flipGeno GenoEntry
HomRef = GenoEntry
HomAlt
flipGeno GenoEntry
HomAlt = GenoEntry
HomRef
flipGeno GenoEntry
g = GenoEntry
g
printSNPCopyProgress :: (MonadIO m) => LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress :: forall (m :: * -> *) a.
MonadIO m =>
LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress LogA
logA UTCTime
startTime = do
IORef Int
counterRef <- IO (IORef Int) -> Proxy () a () a m (IORef Int)
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> Proxy () a () a m (IORef Int))
-> IO (IORef Int) -> Proxy () a () a m (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef (Int
0 :: Int)
Pipe a a m () -> (a -> Pipe a a m ()) -> Pipe a a m ()
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Pipe a a m ()
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat ((a -> Pipe a a m ()) -> Pipe a a m ())
-> (a -> Pipe a a m ()) -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ \a
val -> do
Int
n <- IO Int -> Proxy () a () a m Int
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> Proxy () a () a m Int)
-> IO Int -> Proxy () a () a m Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
counterRef
UTCTime
currentTime <- IO UTCTime -> Proxy () a () a m UTCTime
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO UTCTime
getCurrentTime
LogA -> ReaderT Env IO () -> Pipe a a m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> Pipe a a m ())
-> ReaderT Env IO () -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ Int -> NominalDiffTime -> ReaderT Env IO ()
logProgress Int
n (UTCTime -> UTCTime -> NominalDiffTime
diffUTCTime UTCTime
currentTime UTCTime
startTime)
IO () -> Pipe a a m ()
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Pipe a a m ()) -> IO () -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef IORef Int
counterRef (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
a -> Pipe a a m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield a
val
where
logProgress :: Int -> NominalDiffTime -> PoseidonIO ()
logProgress :: Int -> NominalDiffTime -> ReaderT Env IO ()
logProgress Int
c NominalDiffTime
t
| Int
c Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
10000 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [Char] -> ReaderT Env IO ()
logInfo ([Char] -> ReaderT Env IO ()) -> [Char] -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"SNPs: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> ShowS
padLeft Int
9 (Int -> [Char]
forall a. Show a => a -> [Char]
show Int
c) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime (NominalDiffTime -> Int
forall b. Integral b => NominalDiffTime -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor NominalDiffTime
t)
| Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1000 = [Char] -> ReaderT Env IO ()
logInfo ([Char] -> ReaderT Env IO ()) -> [Char] -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Probing of the first 1000 SNPs successful. Continuing now..."
| Bool
otherwise = () -> ReaderT Env IO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
prettyTime :: Int -> String
prettyTime :: Int -> [Char]
prettyTime Int
t
| Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
60 = Int -> [Char]
forall a. Show a => a -> [Char]
show Int
t [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"s"
| Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
60 Bool -> Bool -> Bool
&& Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
3600 = do
let (Int
minutes, Int
rest) = Int
t Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
60
Int -> [Char]
forall a. Show a => a -> [Char]
show Int
minutes [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"m " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime Int
rest
| Bool
otherwise = do
let (Int
hours, Int
rest) = Int
t Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
3600
Int -> [Char]
forall a. Show a => a -> [Char]
show Int
hours [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"h " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime Int
rest
selectIndices :: [Int] -> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices :: [Int]
-> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices [Int]
indices (EigenstratSnpEntry
snpEntry, GenoLine
genoLine) = (EigenstratSnpEntry
snpEntry, [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
V.fromList [GenoLine
genoLine GenoLine -> Int -> GenoEntry
forall a. Vector a -> Int -> a
V.! Int
i | Int
i <- [Int]
indices])