{-# LANGUAGE OverloadedStrings #-}
module Poseidon.GenotypeData where

import           Poseidon.Utils             (LogA, PoseidonException (..),
                                             PoseidonIO, checkFile,
                                             envInputPlinkMode, logDebug,
                                             logInfo, logWithEnv, padLeft)

import           Control.Exception          (throwIO)
import           Control.Monad              (forM, unless)
import           Control.Monad.Catch        (MonadThrow, throwM)
import           Control.Monad.IO.Class     (MonadIO, liftIO)
import           Data.Aeson                 (FromJSON, ToJSON, object,
                                             parseJSON, toJSON, withObject,
                                             withText, (.:), (.:?), (.=))
import           Data.ByteString            (isPrefixOf)
import           Data.IORef                 (modifyIORef, newIORef, readIORef)
import           Data.List                  (nub, sort)
import           Data.Maybe                 (catMaybes)
import qualified Data.Text                  as T
import           Data.Time                  (NominalDiffTime, UTCTime,
                                             diffUTCTime, getCurrentTime)
import qualified Data.Vector                as V
import           Pipes                      (Pipe, Producer, cat, for, yield,
                                             (>->))
import           Pipes.Safe                 (MonadSafe, runSafeT)
import           SequenceFormats.Eigenstrat (EigenstratIndEntry (..),
                                             EigenstratSnpEntry (..),
                                             GenoEntry (..), GenoLine, Sex (..),
                                             readEigenstrat, readEigenstratInd)
import           SequenceFormats.FreqSum    (FreqSumEntry (..))
import           SequenceFormats.Plink      (plinkFam2EigenstratInd,
                                             readFamFile, readPlink)
import           SequenceFormats.VCF        (VCFentry (..), VCFheader (..),
                                             readVCFfromFile, vcfToFreqSumEntry)
import           System.FilePath            (takeDirectory, takeFileName, (</>))

data GenoDataSource = PacBaseDir
    { GenoDataSource -> [Char]
getPacBaseDir :: FilePath
    }
    | GenoDirect
    { GenoDataSource -> GenotypeDataSpec
getGenoDirect :: GenotypeDataSpec
    }
    deriving Int -> GenoDataSource -> ShowS
[GenoDataSource] -> ShowS
GenoDataSource -> [Char]
(Int -> GenoDataSource -> ShowS)
-> (GenoDataSource -> [Char])
-> ([GenoDataSource] -> ShowS)
-> Show GenoDataSource
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenoDataSource -> ShowS
showsPrec :: Int -> GenoDataSource -> ShowS
$cshow :: GenoDataSource -> [Char]
show :: GenoDataSource -> [Char]
$cshowList :: [GenoDataSource] -> ShowS
showList :: [GenoDataSource] -> ShowS
Show

data GenotypeDataSpec = GenotypeDataSpec {
    GenotypeDataSpec -> GenotypeFileSpec
genotypeFileSpec :: GenotypeFileSpec,
    GenotypeDataSpec -> Maybe SNPSetSpec
genotypeSnpSet   :: Maybe SNPSetSpec
} deriving (Int -> GenotypeDataSpec -> ShowS
[GenotypeDataSpec] -> ShowS
GenotypeDataSpec -> [Char]
(Int -> GenotypeDataSpec -> ShowS)
-> (GenotypeDataSpec -> [Char])
-> ([GenotypeDataSpec] -> ShowS)
-> Show GenotypeDataSpec
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenotypeDataSpec -> ShowS
showsPrec :: Int -> GenotypeDataSpec -> ShowS
$cshow :: GenotypeDataSpec -> [Char]
show :: GenotypeDataSpec -> [Char]
$cshowList :: [GenotypeDataSpec] -> ShowS
showList :: [GenotypeDataSpec] -> ShowS
Show, GenotypeDataSpec -> GenotypeDataSpec -> Bool
(GenotypeDataSpec -> GenotypeDataSpec -> Bool)
-> (GenotypeDataSpec -> GenotypeDataSpec -> Bool)
-> Eq GenotypeDataSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
== :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
$c/= :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
/= :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
Eq)

data GenotypeFileSpec = GenotypeEigenstrat {
    GenotypeFileSpec -> [Char]
_esGenoFile       :: FilePath,
    GenotypeFileSpec -> Maybe [Char]
_esGenoFileChkSum :: Maybe String,
    GenotypeFileSpec -> [Char]
_esSnpFile        :: FilePath,
    GenotypeFileSpec -> Maybe [Char]
_esSnpFileChkSum  :: Maybe String,
    GenotypeFileSpec -> [Char]
_esIndFile        :: FilePath,
    GenotypeFileSpec -> Maybe [Char]
_esIndFileChkSum  :: Maybe String
} | GenotypePlink {
    GenotypeFileSpec -> [Char]
_plGenoFile       :: FilePath,
    GenotypeFileSpec -> Maybe [Char]
_plGenoFileChkSum :: Maybe String,
    GenotypeFileSpec -> [Char]
_plSnpFile        :: FilePath,
    GenotypeFileSpec -> Maybe [Char]
_plSnpFileChkSum  :: Maybe String,
    GenotypeFileSpec -> [Char]
_plIndFile        :: FilePath,
    GenotypeFileSpec -> Maybe [Char]
_plIndFileChkSum  :: Maybe String
} | GenotypeVCF {
    GenotypeFileSpec -> [Char]
_vcfGenoFile       :: FilePath,
    GenotypeFileSpec -> Maybe [Char]
_vcfGenoFileChkSum :: Maybe String
} deriving (Int -> GenotypeFileSpec -> ShowS
[GenotypeFileSpec] -> ShowS
GenotypeFileSpec -> [Char]
(Int -> GenotypeFileSpec -> ShowS)
-> (GenotypeFileSpec -> [Char])
-> ([GenotypeFileSpec] -> ShowS)
-> Show GenotypeFileSpec
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenotypeFileSpec -> ShowS
showsPrec :: Int -> GenotypeFileSpec -> ShowS
$cshow :: GenotypeFileSpec -> [Char]
show :: GenotypeFileSpec -> [Char]
$cshowList :: [GenotypeFileSpec] -> ShowS
showList :: [GenotypeFileSpec] -> ShowS
Show, GenotypeFileSpec -> GenotypeFileSpec -> Bool
(GenotypeFileSpec -> GenotypeFileSpec -> Bool)
-> (GenotypeFileSpec -> GenotypeFileSpec -> Bool)
-> Eq GenotypeFileSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
== :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
$c/= :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
/= :: GenotypeFileSpec -> GenotypeFileSpec -> Bool
Eq)

-- | To facilitate automatic parsing of GenotypeDataSpec from JSON files
instance FromJSON GenotypeDataSpec where
    parseJSON :: Value -> Parser GenotypeDataSpec
parseJSON = [Char]
-> (Object -> Parser GenotypeDataSpec)
-> Value
-> Parser GenotypeDataSpec
forall a. [Char] -> (Object -> Parser a) -> Value -> Parser a
withObject [Char]
"GenotypeData" ((Object -> Parser GenotypeDataSpec)
 -> Value -> Parser GenotypeDataSpec)
-> (Object -> Parser GenotypeDataSpec)
-> Value
-> Parser GenotypeDataSpec
forall a b. (a -> b) -> a -> b
$ \Object
v -> do
        Text
gformat <- Object
v Object -> Key -> Parser Text
forall a. FromJSON a => Object -> Key -> Parser a
.: Key
"format"
        GenotypeFileSpec
gfileSpec <- case Text
gformat of
            Text
"EIGENSTRAT" -> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypeEigenstrat
                ([Char]
 -> Maybe [Char]
 -> [Char]
 -> Maybe [Char]
 -> [Char]
 -> Maybe [Char]
 -> GenotypeFileSpec)
-> Parser [Char]
-> Parser
     (Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> GenotypeFileSpec)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"genoFile"
                Parser
  (Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser
     ([Char]
      -> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"genoFileChkSum"
                Parser
  ([Char]
   -> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char]
-> Parser
     (Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"snpFile"
                Parser (Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpFileChkSum"
                Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char] -> Parser (Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"indFile"
                Parser (Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char]) -> Parser GenotypeFileSpec
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"indFileChkSum"
            Text
"PLINK" -> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> GenotypeFileSpec
GenotypePlink
                ([Char]
 -> Maybe [Char]
 -> [Char]
 -> Maybe [Char]
 -> [Char]
 -> Maybe [Char]
 -> GenotypeFileSpec)
-> Parser [Char]
-> Parser
     (Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> GenotypeFileSpec)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"genoFile"
                Parser
  (Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser
     ([Char]
      -> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"genoFileChkSum"
                Parser
  ([Char]
   -> Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char]
-> Parser
     (Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"snpFile"
                Parser (Maybe [Char] -> [Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char])
-> Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpFileChkSum"
                Parser ([Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char] -> Parser (Maybe [Char] -> GenotypeFileSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"indFile"
                Parser (Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char]) -> Parser GenotypeFileSpec
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"indFileChkSum"
            Text
"VCF" -> [Char] -> Maybe [Char] -> GenotypeFileSpec
GenotypeVCF
                ([Char] -> Maybe [Char] -> GenotypeFileSpec)
-> Parser [Char] -> Parser (Maybe [Char] -> GenotypeFileSpec)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"genoFile"
                Parser (Maybe [Char] -> GenotypeFileSpec)
-> Parser (Maybe [Char]) -> Parser GenotypeFileSpec
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"genoFileChkSum"
            Text
_ -> [Char] -> Parser GenotypeFileSpec
forall a. [Char] -> Parser a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"unknown format " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
gformat)
        Maybe SNPSetSpec
snpSet <- Object
v Object -> Key -> Parser (Maybe SNPSetSpec)
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpSet"
        GenotypeDataSpec -> Parser GenotypeDataSpec
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenotypeDataSpec -> Parser GenotypeDataSpec)
-> GenotypeDataSpec -> Parser GenotypeDataSpec
forall a b. (a -> b) -> a -> b
$ GenotypeFileSpec -> Maybe SNPSetSpec -> GenotypeDataSpec
GenotypeDataSpec GenotypeFileSpec
gfileSpec Maybe SNPSetSpec
snpSet

instance ToJSON GenotypeDataSpec where
    -- this encodes directly to a bytestring Builder
    toJSON :: GenotypeDataSpec -> Value
toJSON (GenotypeDataSpec GenotypeFileSpec
gfileSpec Maybe SNPSetSpec
snpSet) = case GenotypeFileSpec
gfileSpec of
        GenotypeEigenstrat [Char]
genoF Maybe [Char]
genoFchk [Char]
snpF Maybe [Char]
snpFchk [Char]
indF Maybe [Char]
indFchk ->
            [Pair] -> Value
object [
                Key
"format"        Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= ([Char]
"EIGENSTRAT" :: String),
                Key
"genoFile"      Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
genoF,
                Key
"genoFileChkSum"Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
genoFchk,
                Key
"snpFile"       Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
snpF,
                Key
"snpFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
snpFchk,
                Key
"indFile"       Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
indF,
                Key
"indFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
indFchk,
                Key
"snpSet"        Key -> Maybe SNPSetSpec -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe SNPSetSpec
snpSet
            ]
        GenotypePlink [Char]
genoF Maybe [Char]
genoFchk [Char]
snpF Maybe [Char]
snpFchk [Char]
indF Maybe [Char]
indFchk ->
            [Pair] -> Value
object [
                Key
"format"        Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= ([Char]
"PLINK" :: String),
                Key
"genoFile"      Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
genoF,
                Key
"genoFileChkSum"Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
genoFchk,
                Key
"snpFile"       Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
snpF,
                Key
"snpFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
snpFchk,
                Key
"indFile"       Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
indF,
                Key
"indFileChkSum" Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
indFchk,
                Key
"snpSet"        Key -> Maybe SNPSetSpec -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe SNPSetSpec
snpSet
            ]
        GenotypeVCF [Char]
genoF Maybe [Char]
genoFchk ->
            [Pair] -> Value
object [
                Key
"format"        Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= ([Char]
"VCF" :: String),
                Key
"genoFile"      Key -> [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= [Char]
genoF,
                Key
"genoFileChkSum"Key -> Maybe [Char] -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
.= Maybe [Char]
genoFchk
            ]

data SNPSetSpec = SNPSet1240K
    | SNPSetHumanOrigins
    | SNPSetOther
    deriving (SNPSetSpec -> SNPSetSpec -> Bool
(SNPSetSpec -> SNPSetSpec -> Bool)
-> (SNPSetSpec -> SNPSetSpec -> Bool) -> Eq SNPSetSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SNPSetSpec -> SNPSetSpec -> Bool
== :: SNPSetSpec -> SNPSetSpec -> Bool
$c/= :: SNPSetSpec -> SNPSetSpec -> Bool
/= :: SNPSetSpec -> SNPSetSpec -> Bool
Eq)

instance Show SNPSetSpec where
    show :: SNPSetSpec -> [Char]
show SNPSetSpec
SNPSet1240K        = [Char]
"1240K"
    show SNPSetSpec
SNPSetHumanOrigins = [Char]
"HumanOrigins"
    show SNPSetSpec
SNPSetOther        = [Char]
"Other"

instance FromJSON SNPSetSpec where
    parseJSON :: Value -> Parser SNPSetSpec
parseJSON = [Char] -> (Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec
forall a. [Char] -> (Text -> Parser a) -> Value -> Parser a
withText [Char]
"snpSet" ((Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec)
-> (Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec
forall a b. (a -> b) -> a -> b
$ \Text
v -> case Text
v of
        Text
"1240K"        -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSet1240K
        Text
"HumanOrigins" -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSetHumanOrigins
        Text
"Other"        -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSetOther
        Text
_              -> [Char] -> Parser SNPSetSpec
forall a. [Char] -> Parser a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"unknown snpSet " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
v)

instance ToJSON SNPSetSpec where
    toJSON :: SNPSetSpec -> Value
toJSON SNPSetSpec
a = case SNPSetSpec
a of
        SNPSetSpec
SNPSet1240K        -> Value
"1240K"
        SNPSetSpec
SNPSetHumanOrigins -> Value
"HumanOrigins"
        SNPSetSpec
SNPSetOther        -> Value
"Other"

snpSetMergeList :: [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList :: [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList (SNPSetSpec
x:[SNPSetSpec]
xs) Bool
intersect = (SNPSetSpec -> SNPSetSpec -> SNPSetSpec)
-> SNPSetSpec -> [SNPSetSpec] -> SNPSetSpec
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\SNPSetSpec
a SNPSetSpec
b -> SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge SNPSetSpec
a SNPSetSpec
b Bool
intersect) SNPSetSpec
x [SNPSetSpec]
xs
snpSetMergeList [SNPSetSpec]
_ Bool
_ = [Char] -> SNPSetSpec
forall a. HasCallStack => [Char] -> a
error [Char]
"snpSetMergeList: This should never happen"

snpSetMerge :: SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge :: SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge SNPSetSpec
SNPSet1240K         SNPSetSpec
SNPSet1240K         Bool
_     = SNPSetSpec
SNPSet1240K
snpSetMerge SNPSetSpec
SNPSetHumanOrigins  SNPSetSpec
SNPSetHumanOrigins  Bool
_     = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSetOther         SNPSetSpec
_                   Bool
_     = SNPSetSpec
SNPSetOther
snpSetMerge SNPSetSpec
_                   SNPSetSpec
SNPSetOther         Bool
_     = SNPSetSpec
SNPSetOther
snpSetMerge SNPSetSpec
SNPSet1240K         SNPSetSpec
SNPSetHumanOrigins  Bool
True  = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSetHumanOrigins  SNPSetSpec
SNPSet1240K         Bool
True  = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSet1240K         SNPSetSpec
SNPSetHumanOrigins  Bool
False = SNPSetSpec
SNPSet1240K
snpSetMerge SNPSetSpec
SNPSetHumanOrigins  SNPSetSpec
SNPSet1240K         Bool
False = SNPSetSpec
SNPSet1240K

-- | removes directories of all filenames and returns a tuple of the basename and a modified GenotypeDataSpec with pure filenames
-- In case basedirectories do not match, this function will throw an exception
reduceGenotypeFilepaths :: (MonadThrow m) => GenotypeDataSpec -> m (FilePath, GenotypeDataSpec)
reduceGenotypeFilepaths :: forall (m :: * -> *).
MonadThrow m =>
GenotypeDataSpec -> m ([Char], GenotypeDataSpec)
reduceGenotypeFilepaths gd :: GenotypeDataSpec
gd@(GenotypeDataSpec GenotypeFileSpec
gFileSpec Maybe SNPSetSpec
_) = do
    ([Char]
baseDir, GenotypeFileSpec
newGfileSpec) <- case GenotypeFileSpec
gFileSpec of
        GenotypeEigenstrat [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> do
            let baseDirs :: [[Char]]
baseDirs  = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeDirectory   [[Char]
genoF, [Char]
snpF, [Char]
indF]
                fileNames :: [[Char]]
fileNames = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeFileName [[Char]
genoF, [Char]
snpF, [Char]
indF]
            Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (([Char] -> Bool) -> [[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ([Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
==([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs)) [[Char]]
baseDirs) (m () -> m ())
-> (PoseidonException -> m ()) -> PoseidonException -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> m ()
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> m ()) -> PoseidonException -> m ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> PoseidonException
PoseidonUnequalBaseDirException [Char]
genoF [Char]
snpF [Char]
indF
            ([Char], GenotypeFileSpec) -> m ([Char], GenotypeFileSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs, GenotypeFileSpec
gFileSpec {_esGenoFile :: [Char]
_esGenoFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
0, _esSnpFile :: [Char]
_esSnpFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
1, _esIndFile :: [Char]
_esIndFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
2})
        GenotypePlink [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> do
            let baseDirs :: [[Char]]
baseDirs  = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeDirectory   [[Char]
genoF, [Char]
snpF, [Char]
indF]
                fileNames :: [[Char]]
fileNames = ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
takeFileName [[Char]
genoF, [Char]
snpF, [Char]
indF]
            Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (([Char] -> Bool) -> [[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ([Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
==([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs)) [[Char]]
baseDirs) (m () -> m ())
-> (PoseidonException -> m ()) -> PoseidonException -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> m ()
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (PoseidonException -> m ()) -> PoseidonException -> m ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> PoseidonException
PoseidonUnequalBaseDirException [Char]
genoF [Char]
snpF [Char]
indF
            ([Char], GenotypeFileSpec) -> m ([Char], GenotypeFileSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
head [[Char]]
baseDirs, GenotypeFileSpec
gFileSpec {_plGenoFile :: [Char]
_plGenoFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
0, _plSnpFile :: [Char]
_plSnpFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
1, _plIndFile :: [Char]
_plIndFile = [[Char]]
fileNames [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
2})
        GenotypeVCF [Char]
genoF Maybe [Char]
_ -> do
            let baseDir :: [Char]
baseDir  = ShowS
takeDirectory   [Char]
genoF
                fileName :: [Char]
fileName = ShowS
takeFileName [Char]
genoF
            ([Char], GenotypeFileSpec) -> m ([Char], GenotypeFileSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char]
baseDir, GenotypeFileSpec
gFileSpec {_vcfGenoFile :: [Char]
_vcfGenoFile = [Char]
fileName})
    ([Char], GenotypeDataSpec) -> m ([Char], GenotypeDataSpec)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char]
baseDir, GenotypeDataSpec
gd {genotypeFileSpec :: GenotypeFileSpec
genotypeFileSpec = GenotypeFileSpec
newGfileSpec})

-- | A function to return a list of all individuals in the genotype files of a package.
loadIndividuals :: FilePath -- ^ the base directory
               -> GenotypeDataSpec -- ^ the Genotype spec
               -> PoseidonIO [EigenstratIndEntry] -- ^ the returned list of EigenstratIndEntries.
loadIndividuals :: [Char] -> GenotypeDataSpec -> PoseidonIO [EigenstratIndEntry]
loadIndividuals [Char]
d (GenotypeDataSpec GenotypeFileSpec
gFileSpec Maybe SNPSetSpec
_) = do
    PlinkPopNameMode
popMode <- PoseidonIO PlinkPopNameMode
envInputPlinkMode
    case GenotypeFileSpec
gFileSpec of
        GenotypeEigenstrat [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
fn Maybe [Char]
fnChk -> do
            IO () -> ReaderT Env IO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ReaderT Env IO ()) -> IO () -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile ([Char]
d [Char] -> ShowS
</> [Char]
fn) Maybe [Char]
fnChk
            [Char] -> PoseidonIO [EigenstratIndEntry]
forall (m :: * -> *). MonadIO m => [Char] -> m [EigenstratIndEntry]
readEigenstratInd ([Char]
d [Char] -> ShowS
</> [Char]
fn)
        GenotypePlink [Char]
_ Maybe [Char]
_ [Char]
_ Maybe [Char]
_ [Char]
fn Maybe [Char]
fnChk -> do
            IO () -> ReaderT Env IO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ReaderT Env IO ()) -> IO () -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile ([Char]
d [Char] -> ShowS
</> [Char]
fn) Maybe [Char]
fnChk
            (PlinkFamEntry -> EigenstratIndEntry)
-> [PlinkFamEntry] -> [EigenstratIndEntry]
forall a b. (a -> b) -> [a] -> [b]
map (PlinkPopNameMode -> PlinkFamEntry -> EigenstratIndEntry
plinkFam2EigenstratInd PlinkPopNameMode
popMode) ([PlinkFamEntry] -> [EigenstratIndEntry])
-> ReaderT Env IO [PlinkFamEntry]
-> PoseidonIO [EigenstratIndEntry]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> ReaderT Env IO [PlinkFamEntry]
forall (m :: * -> *). MonadIO m => [Char] -> m [PlinkFamEntry]
readFamFile ([Char]
d [Char] -> ShowS
</> [Char]
fn)
        GenotypeVCF [Char]
fn Maybe [Char]
fnChk -> do
            IO () -> ReaderT Env IO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ReaderT Env IO ()) -> IO () -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile ([Char]
d [Char] -> ShowS
</> [Char]
fn) Maybe [Char]
fnChk
            (VCFheader [[Char]]
_ [[Char]]
sampleNames , Producer VCFentry (SafeT IO) ()
_) <- IO (VCFheader, Producer VCFentry (SafeT IO) ())
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (VCFheader, Producer VCFentry (SafeT IO) ())
 -> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> ([Char] -> IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> [Char]
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ())
-> IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall (m :: * -> *) r.
(MonadMask m, MonadIO m) =>
SafeT m r -> m r
runSafeT (SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ())
 -> IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> ([Char]
    -> SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> [Char]
-> IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> SafeT IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall (m :: * -> *).
MonadSafe m =>
[Char] -> m (VCFheader, Producer VCFentry m ())
readVCFfromFile ([Char]
 -> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ()))
-> [Char]
-> ReaderT Env IO (VCFheader, Producer VCFentry (SafeT IO) ())
forall a b. (a -> b) -> a -> b
$ ([Char]
d [Char] -> ShowS
</> [Char]
fn)
            --neither Sex nor population name is part of a VCF file, so we fill dummy values:
            [EigenstratIndEntry] -> PoseidonIO [EigenstratIndEntry]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [[Char] -> Sex -> [Char] -> EigenstratIndEntry
EigenstratIndEntry [Char]
s Sex
Unknown [Char]
"unknown" | [Char]
s <- [[Char]]
sampleNames]

-- | A function to read the genotype data of a package
loadGenotypeData :: (MonadSafe m) =>
                   FilePath -- ^ the base path
                -> GenotypeDataSpec -- ^ the genotype spec
                -> m (Producer (EigenstratSnpEntry, GenoLine) m ())
                -- ^ a Producer over the Snp position values and the genotype line.
loadGenotypeData :: forall (m :: * -> *).
MonadSafe m =>
[Char]
-> GenotypeDataSpec
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
loadGenotypeData [Char]
baseDir (GenotypeDataSpec GenotypeFileSpec
gFileSpec Maybe SNPSetSpec
_) =
    case GenotypeFileSpec
gFileSpec of
        GenotypeEigenstrat [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> ([EigenstratIndEntry],
 Producer (EigenstratSnpEntry, GenoLine) m ())
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a, b) -> b
snd (([EigenstratIndEntry],
  Producer (EigenstratSnpEntry, GenoLine) m ())
 -> Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char]
-> [Char]
-> [Char]
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
readEigenstrat ([Char]
baseDir [Char] -> ShowS
</> [Char]
genoF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
snpF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
indF)
        GenotypePlink      [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ -> ([PlinkFamEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a, b) -> b
snd (([PlinkFamEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
 -> Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([PlinkFamEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char]
-> [Char]
-> [Char]
-> m ([PlinkFamEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> m ([PlinkFamEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
readPlink ([Char]
baseDir [Char] -> ShowS
</> [Char]
genoF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
snpF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
indF)
        GenotypeVCF [Char]
fn Maybe [Char]
_ -> do
            Producer VCFentry m ()
vcfProd <- (VCFheader, Producer VCFentry m ()) -> Producer VCFentry m ()
forall a b. (a, b) -> b
snd ((VCFheader, Producer VCFentry m ()) -> Producer VCFentry m ())
-> m (VCFheader, Producer VCFentry m ())
-> m (Producer VCFentry m ())
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> m (VCFheader, Producer VCFentry m ())
forall (m :: * -> *).
MonadSafe m =>
[Char] -> m (VCFheader, Producer VCFentry m ())
readVCFfromFile ([Char]
baseDir [Char] -> ShowS
</> [Char]
fn)
            Producer (EigenstratSnpEntry, GenoLine) m ()
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer (EigenstratSnpEntry, GenoLine) m ()
 -> m (Producer (EigenstratSnpEntry, GenoLine) m ()))
-> Producer (EigenstratSnpEntry, GenoLine) m ()
-> m (Producer (EigenstratSnpEntry, GenoLine) m ())
forall a b. (a -> b) -> a -> b
$ Producer VCFentry m ()
vcfProd Producer VCFentry m ()
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) r.
MonadIO m =>
Pipe VCFentry (EigenstratSnpEntry, GenoLine) m r
vcf2eigenstratPipe

vcf2eigenstratPipe :: (MonadIO m) => Pipe VCFentry (EigenstratSnpEntry, GenoLine) m r
vcf2eigenstratPipe :: forall (m :: * -> *) r.
MonadIO m =>
Pipe VCFentry (EigenstratSnpEntry, GenoLine) m r
vcf2eigenstratPipe = Proxy () VCFentry () VCFentry m r
-> (VCFentry
    -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m r
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Proxy () VCFentry () VCFentry m r
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat ((VCFentry
  -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
 -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m r)
-> (VCFentry
    -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m r
forall a b. (a -> b) -> a -> b
$ \VCFentry
vcfEntry -> do
    case VCFentry -> Either [Char] FreqSumEntry
vcfToFreqSumEntry VCFentry
vcfEntry of --freqSum is a useful intermediate format. This function already does a bunch of checks of the VCF data.
        Right (FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
snpId_ Maybe Double
geneticPos Char
ref Char
alt [Maybe Int]
alleleCounts) -> do
            let eigenstratSnpEntry :: EigenstratSnpEntry
eigenstratSnpEntry = Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos (Double -> (Double -> Double) -> Maybe Double -> Double
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Double
0.0 Double -> Double
forall a. a -> a
id Maybe Double
geneticPos) (ByteString
-> (ByteString -> ByteString) -> Maybe ByteString -> ByteString
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ByteString
"" ByteString -> ByteString
forall a. a -> a
id Maybe ByteString
snpId_) Char
ref Char
alt
            GenoLine
genoLine <- [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
V.fromList ([GenoEntry] -> GenoLine)
-> Proxy
     () VCFentry () (EigenstratSnpEntry, GenoLine) m [GenoEntry]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoLine
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Maybe Int]
-> (Maybe Int
    -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry)
-> Proxy
     () VCFentry () (EigenstratSnpEntry, GenoLine) m [GenoEntry]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Maybe Int]
alleleCounts (\Maybe Int
alleleCount -> do
                    case Maybe Int
alleleCount of
                        Maybe Int
Nothing -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Missing
                        Just Int
0  -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomRef
                        Just Int
1  -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Het
                        Just Int
2  -> GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomAlt
                        Maybe Int
_       -> IO GenoEntry
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a.
IO a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO GenoEntry
 -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry)
-> ([Char] -> IO GenoEntry)
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO GenoEntry
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO GenoEntry)
-> ([Char] -> PoseidonException) -> [Char] -> IO GenoEntry
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonGenotypeException ([Char]
 -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry)
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m GenoEntry
forall a b. (a -> b) -> a -> b
$
                            [Char]
"illegal dosage (" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Maybe Int -> [Char]
forall a. Show a => a -> [Char]
show Maybe Int
alleleCount [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
") in VCF file at chrom " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Chrom -> [Char]
forall a. Show a => a -> [Char]
show Chrom
chrom [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
", position " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
pos)
            (EigenstratSnpEntry, GenoLine)
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (EigenstratSnpEntry
eigenstratSnpEntry, GenoLine
genoLine)
        Left [Char]
err -> IO () -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall a.
IO a -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> ([Char] -> IO ())
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO ())
-> ([Char] -> PoseidonException) -> [Char] -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonGenotypeException ([Char]
 -> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ())
-> [Char]
-> Proxy () VCFentry () (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a -> b) -> a -> b
$ [Char]
err

joinEntries :: (MonadIO m) => LogA -> [Int] -> [String] -> [Maybe (EigenstratSnpEntry, GenoLine)] -> m (EigenstratSnpEntry, GenoLine)
joinEntries :: forall (m :: * -> *).
MonadIO m =>
LogA
-> [Int]
-> [[Char]]
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> m (EigenstratSnpEntry, GenoLine)
joinEntries LogA
logA [Int]
nrInds [[Char]]
pacNames [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList = do
    let allSnpEntries :: [EigenstratSnpEntry]
allSnpEntries = ((EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry)
-> [(EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry]
forall a b. (a -> b) -> [a] -> [b]
map (EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry
forall a b. (a, b) -> a
fst ([(EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry])
-> ([Maybe (EigenstratSnpEntry, GenoLine)]
    -> [(EigenstratSnpEntry, GenoLine)])
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> [EigenstratSnpEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (EigenstratSnpEntry, GenoLine)]
-> [(EigenstratSnpEntry, GenoLine)]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry])
-> [Maybe (EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry]
forall a b. (a -> b) -> a -> b
$ [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList
    EigenstratSnpEntry
consensusSnpEntry <- LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
forall (m :: * -> *).
MonadIO m =>
LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry LogA
logA [EigenstratSnpEntry]
allSnpEntries
    [GenoLine]
recodedGenotypes <- [(Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))]
-> ((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
    -> m GenoLine)
-> m [GenoLine]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM ([Int]
-> [[Char]]
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> [(Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [Int]
nrInds [[Char]]
pacNames [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList) (((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
  -> m GenoLine)
 -> m [GenoLine])
-> ((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
    -> m GenoLine)
-> m [GenoLine]
forall a b. (a -> b) -> a -> b
$ \(Int
n, [Char]
name, Maybe (EigenstratSnpEntry, GenoLine)
maybeTuple) ->
        case Maybe (EigenstratSnpEntry, GenoLine)
maybeTuple of
            Maybe (EigenstratSnpEntry, GenoLine)
Nothing -> GenoLine -> m GenoLine
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> GenoEntry -> GenoLine
forall a. Int -> a -> Vector a
V.replicate Int
n GenoEntry
Missing)
            Just (EigenstratSnpEntry
snpEntry, GenoLine
genoLine) -> case EigenstratSnpEntry
-> EigenstratSnpEntry -> GenoLine -> Either [Char] GenoLine
recodeAlleles EigenstratSnpEntry
consensusSnpEntry EigenstratSnpEntry
snpEntry GenoLine
genoLine of
                Left [Char]
err -> do
                    let msg :: [Char]
msg = [Char]
"Error in genotype data of package " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
name [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err
                    IO GenoLine -> m GenoLine
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO GenoLine -> m GenoLine)
-> (PoseidonException -> IO GenoLine)
-> PoseidonException
-> m GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO GenoLine
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> m GenoLine)
-> PoseidonException -> m GenoLine
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenotypeException [Char]
msg
                Right GenoLine
x -> GenoLine -> m GenoLine
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
x
    (EigenstratSnpEntry, GenoLine) -> m (EigenstratSnpEntry, GenoLine)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratSnpEntry
consensusSnpEntry, [GenoLine] -> GenoLine
forall a. [Vector a] -> Vector a
V.concat [GenoLine]
recodedGenotypes)

getConsensusSnpEntry :: (MonadIO m) => LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry :: forall (m :: * -> *).
MonadIO m =>
LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry LogA
logA [EigenstratSnpEntry]
snpEntries = do
    let chrom :: Chrom
chrom = EigenstratSnpEntry -> Chrom
snpChrom (EigenstratSnpEntry -> Chrom)
-> ([EigenstratSnpEntry] -> EigenstratSnpEntry)
-> [EigenstratSnpEntry]
-> Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EigenstratSnpEntry] -> EigenstratSnpEntry
forall a. HasCallStack => [a] -> a
head ([EigenstratSnpEntry] -> Chrom) -> [EigenstratSnpEntry] -> Chrom
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        pos :: Int
pos = EigenstratSnpEntry -> Int
snpPos (EigenstratSnpEntry -> Int)
-> ([EigenstratSnpEntry] -> EigenstratSnpEntry)
-> [EigenstratSnpEntry]
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EigenstratSnpEntry] -> EigenstratSnpEntry
forall a. HasCallStack => [a] -> a
head ([EigenstratSnpEntry] -> Int) -> [EigenstratSnpEntry] -> Int
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        uniqueIds :: [ByteString]
uniqueIds = [ByteString] -> [ByteString]
forall a. Eq a => [a] -> [a]
nub ([ByteString] -> [ByteString])
-> ([EigenstratSnpEntry] -> [ByteString])
-> [EigenstratSnpEntry]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (EigenstratSnpEntry -> ByteString)
-> [EigenstratSnpEntry] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map EigenstratSnpEntry -> ByteString
snpId ([EigenstratSnpEntry] -> [ByteString])
-> [EigenstratSnpEntry] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        uniqueGenPos :: [Double]
uniqueGenPos = [Double] -> [Double]
forall a. Ord a => [a] -> [a]
sort ([Double] -> [Double])
-> ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry]
-> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Double] -> [Double]
forall a. Eq a => [a] -> [a]
nub ([Double] -> [Double])
-> ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry]
-> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (EigenstratSnpEntry -> Double) -> [EigenstratSnpEntry] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map EigenstratSnpEntry -> Double
snpGeneticPos ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry] -> [Double]
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        allAlleles :: [Char]
allAlleles    = [[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[Char
r, Char
a] | EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
r Char
a <- [EigenstratSnpEntry]
snpEntries]
        uniqueAlleles :: [Char]
uniqueAlleles = ShowS
forall a. Eq a => [a] -> [a]
nub ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (\Char
a -> Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'N' Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'0' Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'X') ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ [Char]
allAlleles
    ByteString
id_ <- case [ByteString]
uniqueIds of
        [ByteString
i] -> ByteString -> m ByteString
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
i
        [ByteString]
_ -> do -- multiple Ids: Picking the first rs-number if possible, otherwise the first one.
            let rsIds :: [ByteString]
rsIds = (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> ByteString -> Bool
isPrefixOf ByteString
"rs") [ByteString]
uniqueIds
                selectedId :: ByteString
selectedId = case [ByteString]
rsIds of
                    (ByteString
i:[ByteString]
_) -> ByteString
i
                    [ByteString]
_     -> [ByteString] -> ByteString
forall a. HasCallStack => [a] -> a
head [ByteString]
uniqueIds
            LogA -> ReaderT Env IO () -> m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> m ())
-> ([Char] -> ReaderT Env IO ()) -> [Char] -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> ReaderT Env IO ()
logDebug ([Char] -> m ()) -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$
                [Char]
"Found inconsistent SNP IDs: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [ByteString] -> [Char]
forall a. Show a => a -> [Char]
show [ByteString]
uniqueIds [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
". Choosing " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
selectedId
            ByteString -> m ByteString
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
selectedId
    Double
genPos <- case [Double]
uniqueGenPos of
        [Double
p] -> Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
p
        [Double
0.0, Double
p] -> Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
p -- 0.0 is considered "no data" in genetic position column
        [Double]
_ -> do -- multiple non-zero genetic positions. Choosing the largest one.
            let selectedGenPos :: Double
selectedGenPos = [Double] -> Double
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
uniqueGenPos
            LogA -> ReaderT Env IO () -> m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> m ())
-> ([Char] -> ReaderT Env IO ()) -> [Char] -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> ReaderT Env IO ()
logDebug ([Char] -> m ()) -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$
                [Char]
"Found inconsistent genetic positions in SNP " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
id_ [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
                [Double] -> [Char]
forall a. Show a => a -> [Char]
show [Double]
uniqueGenPos [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
". Choosing " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Double -> [Char]
forall a. Show a => a -> [Char]
show Double
selectedGenPos
            Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
selectedGenPos
    case [Char]
uniqueAlleles of
        [] -> do -- no non-missing alleles found
            -- logWithEnv LogA . logDebug $
            --     "SNP " ++ show id_ ++ " appears to have no data (both ref and alt allele are blank"
            EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
'N' Char
'N')
        [Char
r] -> do -- only one non-missing allele found
            -- logWithEnv LogA . logDebug $
            --     "SNP " ++ show id_ ++ " appears to be monomorphic (only one of ref and alt alleles are non-blank)"
            EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
'N' Char
r)
        [Char
ref, Char
alt] ->
            EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
ref Char
alt)
        [Char]
_ -> IO EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO EigenstratSnpEntry -> m EigenstratSnpEntry)
-> (PoseidonException -> IO EigenstratSnpEntry)
-> PoseidonException
-> m EigenstratSnpEntry
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO EigenstratSnpEntry
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> m EigenstratSnpEntry)
-> PoseidonException -> m EigenstratSnpEntry
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenotypeException ([Char]
"Incongruent alleles: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [EigenstratSnpEntry] -> [Char]
forall a. Show a => a -> [Char]
show [EigenstratSnpEntry]
snpEntries)

recodeAlleles :: EigenstratSnpEntry -> EigenstratSnpEntry -> GenoLine -> Either String GenoLine
recodeAlleles :: EigenstratSnpEntry
-> EigenstratSnpEntry -> GenoLine -> Either [Char] GenoLine
recodeAlleles EigenstratSnpEntry
consensusSnpEntry EigenstratSnpEntry
snpEntry GenoLine
genoLine = do
    let (EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
consRefA Char
consAltA) = EigenstratSnpEntry
consensusSnpEntry
    let (EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
refA Char
altA) = EigenstratSnpEntry
snpEntry
    let maybeRecodedGenoline :: Either [Char] GenoLine
maybeRecodedGenoline = case (Char -> Bool
isMissing Char
consRefA, Char -> Bool
isMissing Char
consAltA) of
            (Bool
False, Bool
False) -> Char -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine1 Char
consRefA Char
consAltA Char
refA Char
altA
            (Bool
False, Bool
True)  -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine2 Char
consRefA          Char
refA Char
altA
            (Bool
True, Bool
False)  -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine3          Char
consAltA Char
refA Char
altA
            (Bool
True, Bool
True)   -> Either [Char] GenoLine
maybeFlipGenoLine4
    case Either [Char] GenoLine
maybeRecodedGenoline of
        Left [Char]
err -> [Char] -> Either [Char] GenoLine
forall a b. a -> Either a b
Left ([Char]
"At snp " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ EigenstratSnpEntry -> [Char]
forall a. Show a => a -> [Char]
show EigenstratSnpEntry
snpEntry [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": allele coding error due to inconsistent \
                           \alleles with consensus alleles ref = " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
consRefA] [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
", alt = " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
consAltA] [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
                           [Char]
". Error: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err)
        Right GenoLine
recodedGenoLine -> GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
recodedGenoLine
  where
    isMissing :: Char -> Bool
isMissing Char
'0' = Bool
True
    isMissing Char
'N' = Bool
True
    isMissing Char
_   = Bool
False
    maybeFlipGenoLine1 :: a -> a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine1 a
consRefA a
consAltA a
refA a
altA
        | (a
refA, a
altA) (a, a) -> (a, a) -> Bool
forall a. Eq a => a -> a -> Bool
== (a
consRefA, a
consAltA) = GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- simple concordance
        | (a
refA, a
altA) (a, a) -> (a, a) -> Bool
forall a. Eq a => a -> a -> Bool
== (a
consAltA, a
consRefA) = GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- alleles flipped
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- refs equal, alts different, need everything HomRef or Missing
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- alts equal, refs different, need everything HomAlt
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- need everything HomRef, then flip
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- need everything HomAlt, then flip
        | Bool
otherwise                            = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    maybeFlipGenoLine2 :: a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine2 a
consRefA a
refA a
altA
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- refs equal, need everything HomRef or Missing
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- ref flipped, need everything HomAlt or Missing, then flip
        | Bool
otherwise                            = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    maybeFlipGenoLine3 :: a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine3 a
consAltA a
refA a
altA
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- alt flipped, need everything HomAlt or Missing, then flip
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- alts equal, need everything HomAlt or Missing
        | Bool
otherwise                            = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    maybeFlipGenoLine4 :: Either [Char] GenoLine
maybeFlipGenoLine4 = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    checked :: GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing Either [Char] b
action = if (GenoEntry -> Bool) -> GenoLine -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any (GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
Missing) GenoLine
genoLine then [Char] -> Either [Char] b
forall a b. a -> Either a b
Left [Char]
"Requiring all genotype missing" else Either [Char] b
action
    checked GenoEntry
t       Either [Char] b
action = if (GenoEntry -> Bool) -> GenoLine -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any (\GenoEntry
g -> GenoEntry
g GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
Missing Bool -> Bool -> Bool
&& GenoEntry
g GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
t) GenoLine
genoLine then [Char] -> Either [Char] b
forall a b. a -> Either a b
Left ([Char]
"requiring all genotypes missing or " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ GenoEntry -> [Char]
forall a. Show a => a -> [Char]
show GenoEntry
t) else Either [Char] b
action
    flipGeno :: GenoEntry -> GenoEntry
flipGeno GenoEntry
HomRef = GenoEntry
HomAlt
    flipGeno GenoEntry
HomAlt = GenoEntry
HomRef
    flipGeno GenoEntry
g      = GenoEntry
g

printSNPCopyProgress :: (MonadIO m) => LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress :: forall (m :: * -> *) a.
MonadIO m =>
LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress LogA
logA UTCTime
startTime = do
    IORef Int
counterRef <- IO (IORef Int) -> Proxy () a () a m (IORef Int)
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> Proxy () a () a m (IORef Int))
-> IO (IORef Int) -> Proxy () a () a m (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef (Int
0 :: Int)
    Pipe a a m () -> (a -> Pipe a a m ()) -> Pipe a a m ()
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Pipe a a m ()
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat ((a -> Pipe a a m ()) -> Pipe a a m ())
-> (a -> Pipe a a m ()) -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ \a
val -> do
        Int
n <- IO Int -> Proxy () a () a m Int
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> Proxy () a () a m Int)
-> IO Int -> Proxy () a () a m Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
counterRef
        UTCTime
currentTime <- IO UTCTime -> Proxy () a () a m UTCTime
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO UTCTime
getCurrentTime
        LogA -> ReaderT Env IO () -> Pipe a a m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> Pipe a a m ())
-> ReaderT Env IO () -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ Int -> NominalDiffTime -> ReaderT Env IO ()
logProgress Int
n (UTCTime -> UTCTime -> NominalDiffTime
diffUTCTime UTCTime
currentTime UTCTime
startTime)
        IO () -> Pipe a a m ()
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Pipe a a m ()) -> IO () -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef IORef Int
counterRef (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        a -> Pipe a a m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield a
val
    where
        logProgress :: Int -> NominalDiffTime -> PoseidonIO ()
        logProgress :: Int -> NominalDiffTime -> ReaderT Env IO ()
logProgress Int
c NominalDiffTime
t
            |  Int
c Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
10000 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [Char] -> ReaderT Env IO ()
logInfo ([Char] -> ReaderT Env IO ()) -> [Char] -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"SNPs: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> ShowS
padLeft Int
9 (Int -> [Char]
forall a. Show a => a -> [Char]
show Int
c) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"    " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime (NominalDiffTime -> Int
forall b. Integral b => NominalDiffTime -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor NominalDiffTime
t)
            |  Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1000          = [Char] -> ReaderT Env IO ()
logInfo ([Char] -> ReaderT Env IO ()) -> [Char] -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Probing of the first 1000 SNPs successful. Continuing now..."
            | Bool
otherwise = () -> ReaderT Env IO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        prettyTime :: Int -> String
        prettyTime :: Int -> [Char]
prettyTime Int
t
            | Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
60 = Int -> [Char]
forall a. Show a => a -> [Char]
show Int
t [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"s"
            | Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
60 Bool -> Bool -> Bool
&& Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
3600 = do
                let (Int
minutes, Int
rest) = Int
t Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
60
                Int -> [Char]
forall a. Show a => a -> [Char]
show Int
minutes [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"m " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime Int
rest
            | Bool
otherwise = do
                let (Int
hours, Int
rest) = Int
t Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
3600
                Int -> [Char]
forall a. Show a => a -> [Char]
show Int
hours   [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"h " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime Int
rest


selectIndices :: [Int] -> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices :: [Int]
-> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices [Int]
indices (EigenstratSnpEntry
snpEntry, GenoLine
genoLine) = (EigenstratSnpEntry
snpEntry, [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
V.fromList [GenoLine
genoLine GenoLine -> Int -> GenoEntry
forall a. Vector a -> Int -> a
V.! Int
i | Int
i <- [Int]
indices])