{-# LANGUAGE OverloadedStrings #-}
module Poseidon.GenotypeData where

import           Poseidon.Utils             (LogA, PoseidonException (..),
                                             PoseidonIO, checkFile,
                                             envInputPlinkMode, logDebug,
                                             logInfo, logWithEnv, padLeft)

import           Control.Exception          (throwIO)
import           Control.Monad              (forM)
import           Control.Monad.IO.Class     (MonadIO, liftIO)
import           Data.Aeson                 (FromJSON, ToJSON, object,
                                             parseJSON, toJSON, withObject,
                                             withText, (.:), (.:?), (.=))
import           Data.ByteString            (isPrefixOf)
import           Data.IORef                 (modifyIORef, newIORef, readIORef)
import           Data.List                  (nub, sort)
import           Data.Maybe                 (catMaybes)
import qualified Data.Text                  as T
import           Data.Time                  (NominalDiffTime, UTCTime,
                                             diffUTCTime, getCurrentTime)
import qualified Data.Vector                as V
import           Pipes                      (Pipe, Producer, cat, for, yield)
import           Pipes.Safe                 (MonadSafe)
import           SequenceFormats.Eigenstrat (EigenstratIndEntry (..),
                                             EigenstratSnpEntry (..),
                                             GenoEntry (..), GenoLine,
                                             readEigenstrat, readEigenstratInd)
import           SequenceFormats.Plink      (PlinkPopNameMode,
                                             plinkFam2EigenstratInd,
                                             readFamFile, readPlink)
import           System.FilePath            ((</>))

data GenoDataSource = PacBaseDir
    { GenoDataSource -> [Char]
getPacBaseDirs :: FilePath
    }
    | GenoDirect
    { GenoDataSource -> GenotypeDataSpec
getGenoDirect :: GenotypeDataSpec
    }
    deriving Int -> GenoDataSource -> ShowS
[GenoDataSource] -> ShowS
GenoDataSource -> [Char]
(Int -> GenoDataSource -> ShowS)
-> (GenoDataSource -> [Char])
-> ([GenoDataSource] -> ShowS)
-> Show GenoDataSource
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenoDataSource -> ShowS
showsPrec :: Int -> GenoDataSource -> ShowS
$cshow :: GenoDataSource -> [Char]
show :: GenoDataSource -> [Char]
$cshowList :: [GenoDataSource] -> ShowS
showList :: [GenoDataSource] -> ShowS
Show

-- | A datatype to specify genotype files
data GenotypeDataSpec = GenotypeDataSpec
    { GenotypeDataSpec -> GenotypeFormatSpec
format         :: GenotypeFormatSpec
    -- ^ the genotype format
    , GenotypeDataSpec -> [Char]
genoFile       :: FilePath
    -- ^ path to the geno file
    , GenotypeDataSpec -> Maybe [Char]
genoFileChkSum :: Maybe String
    -- ^ the optional checksum for the geno file
    , GenotypeDataSpec -> [Char]
snpFile        :: FilePath
    -- ^ path to the snp file
    , GenotypeDataSpec -> Maybe [Char]
snpFileChkSum  :: Maybe String
    -- ^ the optional checksum for the Snp file
    , GenotypeDataSpec -> [Char]
indFile        :: FilePath
    -- ^ path to the ind file
    , GenotypeDataSpec -> Maybe [Char]
indFileChkSum  :: Maybe String
    -- ^ the optional checksum for the indfile
    , GenotypeDataSpec -> Maybe SNPSetSpec
snpSet         :: Maybe SNPSetSpec
    -- ^ the SNP set de facto listed in the genotype data
    }
    deriving (Int -> GenotypeDataSpec -> ShowS
[GenotypeDataSpec] -> ShowS
GenotypeDataSpec -> [Char]
(Int -> GenotypeDataSpec -> ShowS)
-> (GenotypeDataSpec -> [Char])
-> ([GenotypeDataSpec] -> ShowS)
-> Show GenotypeDataSpec
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenotypeDataSpec -> ShowS
showsPrec :: Int -> GenotypeDataSpec -> ShowS
$cshow :: GenotypeDataSpec -> [Char]
show :: GenotypeDataSpec -> [Char]
$cshowList :: [GenotypeDataSpec] -> ShowS
showList :: [GenotypeDataSpec] -> ShowS
Show, GenotypeDataSpec -> GenotypeDataSpec -> Bool
(GenotypeDataSpec -> GenotypeDataSpec -> Bool)
-> (GenotypeDataSpec -> GenotypeDataSpec -> Bool)
-> Eq GenotypeDataSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
== :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
$c/= :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
/= :: GenotypeDataSpec -> GenotypeDataSpec -> Bool
Eq)

-- | To facilitate automatic parsing of GenotypeDataSpec from JSON files
instance FromJSON GenotypeDataSpec where
    parseJSON :: Value -> Parser GenotypeDataSpec
parseJSON = [Char]
-> (Object -> Parser GenotypeDataSpec)
-> Value
-> Parser GenotypeDataSpec
forall a. [Char] -> (Object -> Parser a) -> Value -> Parser a
withObject [Char]
"GenotypeData" ((Object -> Parser GenotypeDataSpec)
 -> Value -> Parser GenotypeDataSpec)
-> (Object -> Parser GenotypeDataSpec)
-> Value
-> Parser GenotypeDataSpec
forall a b. (a -> b) -> a -> b
$ \Object
v -> GenotypeFormatSpec
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> [Char]
-> Maybe [Char]
-> Maybe SNPSetSpec
-> GenotypeDataSpec
GenotypeDataSpec
        (GenotypeFormatSpec
 -> [Char]
 -> Maybe [Char]
 -> [Char]
 -> Maybe [Char]
 -> [Char]
 -> Maybe [Char]
 -> Maybe SNPSetSpec
 -> GenotypeDataSpec)
-> Parser GenotypeFormatSpec
-> Parser
     ([Char]
      -> Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> Maybe SNPSetSpec
      -> GenotypeDataSpec)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Key -> Parser GenotypeFormatSpec
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"format"
        Parser
  ([Char]
   -> Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> Maybe SNPSetSpec
   -> GenotypeDataSpec)
-> Parser [Char]
-> Parser
     (Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> Maybe SNPSetSpec
      -> GenotypeDataSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"genoFile"
        Parser
  (Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> Maybe SNPSetSpec
   -> GenotypeDataSpec)
-> Parser (Maybe [Char])
-> Parser
     ([Char]
      -> Maybe [Char]
      -> [Char]
      -> Maybe [Char]
      -> Maybe SNPSetSpec
      -> GenotypeDataSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"genoFileChkSum"
        Parser
  ([Char]
   -> Maybe [Char]
   -> [Char]
   -> Maybe [Char]
   -> Maybe SNPSetSpec
   -> GenotypeDataSpec)
-> Parser [Char]
-> Parser
     (Maybe [Char]
      -> [Char] -> Maybe [Char] -> Maybe SNPSetSpec -> GenotypeDataSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"snpFile"
        Parser
  (Maybe [Char]
   -> [Char] -> Maybe [Char] -> Maybe SNPSetSpec -> GenotypeDataSpec)
-> Parser (Maybe [Char])
-> Parser
     ([Char] -> Maybe [Char] -> Maybe SNPSetSpec -> GenotypeDataSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpFileChkSum"
        Parser
  ([Char] -> Maybe [Char] -> Maybe SNPSetSpec -> GenotypeDataSpec)
-> Parser [Char]
-> Parser (Maybe [Char] -> Maybe SNPSetSpec -> GenotypeDataSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser [Char]
forall a. FromJSON a => Object -> Key -> Parser a
.:  Key
"indFile"
        Parser (Maybe [Char] -> Maybe SNPSetSpec -> GenotypeDataSpec)
-> Parser (Maybe [Char])
-> Parser (Maybe SNPSetSpec -> GenotypeDataSpec)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe [Char])
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"indFileChkSum"
        Parser (Maybe SNPSetSpec -> GenotypeDataSpec)
-> Parser (Maybe SNPSetSpec) -> Parser GenotypeDataSpec
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Key -> Parser (Maybe SNPSetSpec)
forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
.:? Key
"snpSet"

instance ToJSON GenotypeDataSpec where
    -- this encodes directly to a bytestring Builder
    toJSON :: GenotypeDataSpec -> Value
toJSON GenotypeDataSpec
x = [Pair] -> Value
object [
        Key
"format"        Key -> GenotypeFormatSpec -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> GenotypeFormatSpec
format GenotypeDataSpec
x,
        Key
"genoFile"      Key -> [Char] -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> [Char]
genoFile GenotypeDataSpec
x,
        Key
"genoFileChkSum"Key -> Maybe [Char] -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> Maybe [Char]
genoFileChkSum GenotypeDataSpec
x,
        Key
"snpFile"       Key -> [Char] -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> [Char]
snpFile GenotypeDataSpec
x,
        Key
"snpFileChkSum" Key -> Maybe [Char] -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> Maybe [Char]
snpFileChkSum GenotypeDataSpec
x,
        Key
"indFile"       Key -> [Char] -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> [Char]
indFile GenotypeDataSpec
x,
        Key
"indFileChkSum" Key -> Maybe [Char] -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> Maybe [Char]
indFileChkSum GenotypeDataSpec
x,
        Key
"snpSet"        Key -> Maybe SNPSetSpec -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> Pair
.= GenotypeDataSpec -> Maybe SNPSetSpec
snpSet GenotypeDataSpec
x
        ]

-- | A data type representing the options fo the genotype format
data GenotypeFormatSpec = GenotypeFormatEigenstrat
    | GenotypeFormatPlink
    deriving (GenotypeFormatSpec -> GenotypeFormatSpec -> Bool
(GenotypeFormatSpec -> GenotypeFormatSpec -> Bool)
-> (GenotypeFormatSpec -> GenotypeFormatSpec -> Bool)
-> Eq GenotypeFormatSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GenotypeFormatSpec -> GenotypeFormatSpec -> Bool
== :: GenotypeFormatSpec -> GenotypeFormatSpec -> Bool
$c/= :: GenotypeFormatSpec -> GenotypeFormatSpec -> Bool
/= :: GenotypeFormatSpec -> GenotypeFormatSpec -> Bool
Eq)

instance Show GenotypeFormatSpec where
    show :: GenotypeFormatSpec -> [Char]
show GenotypeFormatSpec
GenotypeFormatPlink      = [Char]
"PLINK"
    show GenotypeFormatSpec
GenotypeFormatEigenstrat = [Char]
"EIGENSTRAT"

-- | To facilitate automatic parsing of GenotypeFormatSpec from JSON files
instance FromJSON GenotypeFormatSpec where
    parseJSON :: Value -> Parser GenotypeFormatSpec
parseJSON = [Char]
-> (Text -> Parser GenotypeFormatSpec)
-> Value
-> Parser GenotypeFormatSpec
forall a. [Char] -> (Text -> Parser a) -> Value -> Parser a
withText [Char]
"format" ((Text -> Parser GenotypeFormatSpec)
 -> Value -> Parser GenotypeFormatSpec)
-> (Text -> Parser GenotypeFormatSpec)
-> Value
-> Parser GenotypeFormatSpec
forall a b. (a -> b) -> a -> b
$ \Text
v -> case Text
v of
        Text
"EIGENSTRAT" -> GenotypeFormatSpec -> Parser GenotypeFormatSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure GenotypeFormatSpec
GenotypeFormatEigenstrat
        Text
"PLINK"      -> GenotypeFormatSpec -> Parser GenotypeFormatSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure GenotypeFormatSpec
GenotypeFormatPlink
        Text
_            -> [Char] -> Parser GenotypeFormatSpec
forall a. [Char] -> Parser a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"unknown format " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
v)

instance ToJSON GenotypeFormatSpec where
    toJSON :: GenotypeFormatSpec -> Value
toJSON GenotypeFormatSpec
a = case GenotypeFormatSpec
a of
        GenotypeFormatSpec
GenotypeFormatPlink      -> Value
"PLINK"
        GenotypeFormatSpec
GenotypeFormatEigenstrat -> Value
"EIGENSTRAT"

data SNPSetSpec = SNPSet1240K
    | SNPSetHumanOrigins
    | SNPSetOther
    deriving (SNPSetSpec -> SNPSetSpec -> Bool
(SNPSetSpec -> SNPSetSpec -> Bool)
-> (SNPSetSpec -> SNPSetSpec -> Bool) -> Eq SNPSetSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SNPSetSpec -> SNPSetSpec -> Bool
== :: SNPSetSpec -> SNPSetSpec -> Bool
$c/= :: SNPSetSpec -> SNPSetSpec -> Bool
/= :: SNPSetSpec -> SNPSetSpec -> Bool
Eq)

instance Show SNPSetSpec where
    show :: SNPSetSpec -> [Char]
show SNPSetSpec
SNPSet1240K        = [Char]
"1240K"
    show SNPSetSpec
SNPSetHumanOrigins = [Char]
"HumanOrigins"
    show SNPSetSpec
SNPSetOther        = [Char]
"Other"

instance FromJSON SNPSetSpec where
    parseJSON :: Value -> Parser SNPSetSpec
parseJSON = [Char] -> (Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec
forall a. [Char] -> (Text -> Parser a) -> Value -> Parser a
withText [Char]
"snpSet" ((Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec)
-> (Text -> Parser SNPSetSpec) -> Value -> Parser SNPSetSpec
forall a b. (a -> b) -> a -> b
$ \Text
v -> case Text
v of
        Text
"1240K"        -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSet1240K
        Text
"HumanOrigins" -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSetHumanOrigins
        Text
"Other"        -> SNPSetSpec -> Parser SNPSetSpec
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure SNPSetSpec
SNPSetOther
        Text
_              -> [Char] -> Parser SNPSetSpec
forall a. [Char] -> Parser a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"unknown snpSet " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
v)

instance ToJSON SNPSetSpec where
    toJSON :: SNPSetSpec -> Value
toJSON SNPSetSpec
a = case SNPSetSpec
a of
        SNPSetSpec
SNPSet1240K        -> Value
"1240K"
        SNPSetSpec
SNPSetHumanOrigins -> Value
"HumanOrigins"
        SNPSetSpec
SNPSetOther        -> Value
"Other"

snpSetMergeList :: [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList :: [SNPSetSpec] -> Bool -> SNPSetSpec
snpSetMergeList (SNPSetSpec
x:[SNPSetSpec]
xs) Bool
intersect = (SNPSetSpec -> SNPSetSpec -> SNPSetSpec)
-> SNPSetSpec -> [SNPSetSpec] -> SNPSetSpec
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\SNPSetSpec
a SNPSetSpec
b -> SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge SNPSetSpec
a SNPSetSpec
b Bool
intersect) SNPSetSpec
x [SNPSetSpec]
xs
snpSetMergeList [SNPSetSpec]
_ Bool
_ = [Char] -> SNPSetSpec
forall a. HasCallStack => [Char] -> a
error [Char]
"snpSetMergeList: This should never happen"

snpSetMerge :: SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge :: SNPSetSpec -> SNPSetSpec -> Bool -> SNPSetSpec
snpSetMerge SNPSetSpec
SNPSet1240K         SNPSetSpec
SNPSet1240K         Bool
_     = SNPSetSpec
SNPSet1240K
snpSetMerge SNPSetSpec
SNPSetHumanOrigins  SNPSetSpec
SNPSetHumanOrigins  Bool
_     = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSetOther         SNPSetSpec
_                   Bool
_     = SNPSetSpec
SNPSetOther
snpSetMerge SNPSetSpec
_                   SNPSetSpec
SNPSetOther         Bool
_     = SNPSetSpec
SNPSetOther
snpSetMerge SNPSetSpec
SNPSet1240K         SNPSetSpec
SNPSetHumanOrigins  Bool
True  = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSetHumanOrigins  SNPSetSpec
SNPSet1240K         Bool
True  = SNPSetSpec
SNPSetHumanOrigins
snpSetMerge SNPSetSpec
SNPSet1240K         SNPSetSpec
SNPSetHumanOrigins  Bool
False = SNPSetSpec
SNPSet1240K
snpSetMerge SNPSetSpec
SNPSetHumanOrigins  SNPSetSpec
SNPSet1240K         Bool
False = SNPSetSpec
SNPSet1240K

-- | A function to return a list of all individuals in the genotype files of a package.
loadIndividuals :: FilePath -- ^ the base directory
               -> GenotypeDataSpec -- ^ the Genotype spec
               -> PoseidonIO [EigenstratIndEntry] -- ^ the returned list of EigenstratIndEntries.
loadIndividuals :: [Char] -> GenotypeDataSpec -> PoseidonIO [EigenstratIndEntry]
loadIndividuals [Char]
d GenotypeDataSpec
gd = do
    PlinkPopNameMode
popMode <- PoseidonIO PlinkPopNameMode
envInputPlinkMode
    IO () -> ReaderT Env IO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ReaderT Env IO ()) -> IO () -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe [Char] -> IO ()
checkFile ([Char]
d [Char] -> ShowS
</> GenotypeDataSpec -> [Char]
indFile GenotypeDataSpec
gd) Maybe [Char]
forall a. Maybe a
Nothing
    case GenotypeDataSpec -> GenotypeFormatSpec
format GenotypeDataSpec
gd of
        GenotypeFormatSpec
GenotypeFormatEigenstrat -> [Char] -> PoseidonIO [EigenstratIndEntry]
forall (m :: * -> *). MonadIO m => [Char] -> m [EigenstratIndEntry]
readEigenstratInd ([Char]
d [Char] -> ShowS
</> GenotypeDataSpec -> [Char]
indFile GenotypeDataSpec
gd)
        GenotypeFormatSpec
GenotypeFormatPlink      -> (PlinkFamEntry -> EigenstratIndEntry)
-> [PlinkFamEntry] -> [EigenstratIndEntry]
forall a b. (a -> b) -> [a] -> [b]
map (PlinkPopNameMode -> PlinkFamEntry -> EigenstratIndEntry
plinkFam2EigenstratInd PlinkPopNameMode
popMode) ([PlinkFamEntry] -> [EigenstratIndEntry])
-> ReaderT Env IO [PlinkFamEntry]
-> PoseidonIO [EigenstratIndEntry]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> ReaderT Env IO [PlinkFamEntry]
forall (m :: * -> *). MonadIO m => [Char] -> m [PlinkFamEntry]
readFamFile ([Char]
d [Char] -> ShowS
</> GenotypeDataSpec -> [Char]
indFile GenotypeDataSpec
gd)

-- | A function to read the genotype data of a package
loadGenotypeData :: (MonadSafe m) =>
                   FilePath -- ^ the base path
                -> GenotypeDataSpec -- ^ the genotype spec
                -> PlinkPopNameMode -- ^ The Plink PopName Mode
                -> m ([EigenstratIndEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
                -- ^ a pair of the EigenstratIndEntries and a Producer over the Snp position values and the genotype line.
loadGenotypeData :: forall (m :: * -> *).
MonadSafe m =>
[Char]
-> GenotypeDataSpec
-> PlinkPopNameMode
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
loadGenotypeData [Char]
baseDir (GenotypeDataSpec GenotypeFormatSpec
format_ [Char]
genoF Maybe [Char]
_ [Char]
snpF Maybe [Char]
_ [Char]
indF Maybe [Char]
_ Maybe SNPSetSpec
_) PlinkPopNameMode
popMode =
    case GenotypeFormatSpec
format_ of
        GenotypeFormatSpec
GenotypeFormatEigenstrat -> [Char]
-> [Char]
-> [Char]
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
readEigenstrat ([Char]
baseDir [Char] -> ShowS
</> [Char]
genoF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
snpF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
indF)
        GenotypeFormatSpec
GenotypeFormatPlink      -> do
            ([PlinkFamEntry]
famEntries, Producer (EigenstratSnpEntry, GenoLine) m ()
prod) <- [Char]
-> [Char]
-> [Char]
-> m ([PlinkFamEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *).
MonadSafe m =>
[Char]
-> [Char]
-> [Char]
-> m ([PlinkFamEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
readPlink ([Char]
baseDir [Char] -> ShowS
</> [Char]
genoF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
snpF) ([Char]
baseDir [Char] -> ShowS
</> [Char]
indF)
            ([EigenstratIndEntry],
 Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ((PlinkFamEntry -> EigenstratIndEntry)
-> [PlinkFamEntry] -> [EigenstratIndEntry]
forall a b. (a -> b) -> [a] -> [b]
map (PlinkPopNameMode -> PlinkFamEntry -> EigenstratIndEntry
plinkFam2EigenstratInd PlinkPopNameMode
popMode) [PlinkFamEntry]
famEntries, Producer (EigenstratSnpEntry, GenoLine) m ()
prod)

joinEntries :: (MonadIO m) => LogA -> [Int] -> [String] -> [Maybe (EigenstratSnpEntry, GenoLine)] -> m (EigenstratSnpEntry, GenoLine)
joinEntries :: forall (m :: * -> *).
MonadIO m =>
LogA
-> [Int]
-> [[Char]]
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> m (EigenstratSnpEntry, GenoLine)
joinEntries LogA
logA [Int]
nrInds [[Char]]
pacNames [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList = do
    let allSnpEntries :: [EigenstratSnpEntry]
allSnpEntries = ((EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry)
-> [(EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry]
forall a b. (a -> b) -> [a] -> [b]
map (EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry
forall a b. (a, b) -> a
fst ([(EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry])
-> ([Maybe (EigenstratSnpEntry, GenoLine)]
    -> [(EigenstratSnpEntry, GenoLine)])
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> [EigenstratSnpEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (EigenstratSnpEntry, GenoLine)]
-> [(EigenstratSnpEntry, GenoLine)]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry])
-> [Maybe (EigenstratSnpEntry, GenoLine)] -> [EigenstratSnpEntry]
forall a b. (a -> b) -> a -> b
$ [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList
    EigenstratSnpEntry
consensusSnpEntry <- LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
forall (m :: * -> *).
MonadIO m =>
LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry LogA
logA [EigenstratSnpEntry]
allSnpEntries
    [GenoLine]
recodedGenotypes <- [(Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))]
-> ((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
    -> m GenoLine)
-> m [GenoLine]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM ([Int]
-> [[Char]]
-> [Maybe (EigenstratSnpEntry, GenoLine)]
-> [(Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [Int]
nrInds [[Char]]
pacNames [Maybe (EigenstratSnpEntry, GenoLine)]
maybeTupleList) (((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
  -> m GenoLine)
 -> m [GenoLine])
-> ((Int, [Char], Maybe (EigenstratSnpEntry, GenoLine))
    -> m GenoLine)
-> m [GenoLine]
forall a b. (a -> b) -> a -> b
$ \(Int
n, [Char]
name, Maybe (EigenstratSnpEntry, GenoLine)
maybeTuple) ->
        case Maybe (EigenstratSnpEntry, GenoLine)
maybeTuple of
            Maybe (EigenstratSnpEntry, GenoLine)
Nothing -> GenoLine -> m GenoLine
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> GenoEntry -> GenoLine
forall a. Int -> a -> Vector a
V.replicate Int
n GenoEntry
Missing)
            Just (EigenstratSnpEntry
snpEntry, GenoLine
genoLine) -> case EigenstratSnpEntry
-> EigenstratSnpEntry -> GenoLine -> Either [Char] GenoLine
recodeAlleles EigenstratSnpEntry
consensusSnpEntry EigenstratSnpEntry
snpEntry GenoLine
genoLine of
                Left [Char]
err -> do
                    let msg :: [Char]
msg = [Char]
"Error in genotype data of package " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
name [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err
                    IO GenoLine -> m GenoLine
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO GenoLine -> m GenoLine)
-> (PoseidonException -> IO GenoLine)
-> PoseidonException
-> m GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO GenoLine
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> m GenoLine)
-> PoseidonException -> m GenoLine
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenotypeException [Char]
msg
                Right GenoLine
x -> GenoLine -> m GenoLine
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
x
    (EigenstratSnpEntry, GenoLine) -> m (EigenstratSnpEntry, GenoLine)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratSnpEntry
consensusSnpEntry, [GenoLine] -> GenoLine
forall a. [Vector a] -> Vector a
V.concat [GenoLine]
recodedGenotypes)

getConsensusSnpEntry :: (MonadIO m) => LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry :: forall (m :: * -> *).
MonadIO m =>
LogA -> [EigenstratSnpEntry] -> m EigenstratSnpEntry
getConsensusSnpEntry LogA
logA [EigenstratSnpEntry]
snpEntries = do
    let chrom :: Chrom
chrom = EigenstratSnpEntry -> Chrom
snpChrom (EigenstratSnpEntry -> Chrom)
-> ([EigenstratSnpEntry] -> EigenstratSnpEntry)
-> [EigenstratSnpEntry]
-> Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EigenstratSnpEntry] -> EigenstratSnpEntry
forall a. HasCallStack => [a] -> a
head ([EigenstratSnpEntry] -> Chrom) -> [EigenstratSnpEntry] -> Chrom
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        pos :: Int
pos = EigenstratSnpEntry -> Int
snpPos (EigenstratSnpEntry -> Int)
-> ([EigenstratSnpEntry] -> EigenstratSnpEntry)
-> [EigenstratSnpEntry]
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EigenstratSnpEntry] -> EigenstratSnpEntry
forall a. HasCallStack => [a] -> a
head ([EigenstratSnpEntry] -> Int) -> [EigenstratSnpEntry] -> Int
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        uniqueIds :: [ByteString]
uniqueIds = [ByteString] -> [ByteString]
forall a. Eq a => [a] -> [a]
nub ([ByteString] -> [ByteString])
-> ([EigenstratSnpEntry] -> [ByteString])
-> [EigenstratSnpEntry]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (EigenstratSnpEntry -> ByteString)
-> [EigenstratSnpEntry] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map EigenstratSnpEntry -> ByteString
snpId ([EigenstratSnpEntry] -> [ByteString])
-> [EigenstratSnpEntry] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        uniqueGenPos :: [Double]
uniqueGenPos = [Double] -> [Double]
forall a. Ord a => [a] -> [a]
sort ([Double] -> [Double])
-> ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry]
-> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Double] -> [Double]
forall a. Eq a => [a] -> [a]
nub ([Double] -> [Double])
-> ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry]
-> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (EigenstratSnpEntry -> Double) -> [EigenstratSnpEntry] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map EigenstratSnpEntry -> Double
snpGeneticPos ([EigenstratSnpEntry] -> [Double])
-> [EigenstratSnpEntry] -> [Double]
forall a b. (a -> b) -> a -> b
$ [EigenstratSnpEntry]
snpEntries
        allAlleles :: [Char]
allAlleles    = [[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[Char
r, Char
a] | EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
r Char
a <- [EigenstratSnpEntry]
snpEntries]
        uniqueAlleles :: [Char]
uniqueAlleles = ShowS
forall a. Eq a => [a] -> [a]
nub ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (\Char
a -> Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'N' Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'0' Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'X') ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ [Char]
allAlleles
    ByteString
id_ <- case [ByteString]
uniqueIds of
        [ByteString
i] -> ByteString -> m ByteString
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
i
        [ByteString]
_ -> do -- multiple Ids: Picking the first rs-number if possible, otherwise the first one.
            let rsIds :: [ByteString]
rsIds = (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> ByteString -> Bool
isPrefixOf ByteString
"rs") [ByteString]
uniqueIds
                selectedId :: ByteString
selectedId = case [ByteString]
rsIds of
                    (ByteString
i:[ByteString]
_) -> ByteString
i
                    [ByteString]
_     -> [ByteString] -> ByteString
forall a. HasCallStack => [a] -> a
head [ByteString]
uniqueIds
            LogA -> ReaderT Env IO () -> m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> m ())
-> ([Char] -> ReaderT Env IO ()) -> [Char] -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> ReaderT Env IO ()
logDebug ([Char] -> m ()) -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$
                [Char]
"Found inconsistent SNP IDs: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [ByteString] -> [Char]
forall a. Show a => a -> [Char]
show [ByteString]
uniqueIds [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
". Choosing " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
selectedId
            ByteString -> m ByteString
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
selectedId
    Double
genPos <- case [Double]
uniqueGenPos of
        [Double
p] -> Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
p
        [Double
0.0, Double
p] -> Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
p -- 0.0 is considered "no data" in genetic position column
        [Double]
_ -> do -- multiple non-zero genetic positions. Choosing the largest one.
            let selectedGenPos :: Double
selectedGenPos = [Double] -> Double
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
uniqueGenPos
            LogA -> ReaderT Env IO () -> m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> m ())
-> ([Char] -> ReaderT Env IO ()) -> [Char] -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> ReaderT Env IO ()
logDebug ([Char] -> m ()) -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$
                [Char]
"Found inconsistent genetic positions in SNP " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
id_ [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
                [Double] -> [Char]
forall a. Show a => a -> [Char]
show [Double]
uniqueGenPos [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
". Choosing " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Double -> [Char]
forall a. Show a => a -> [Char]
show Double
selectedGenPos
            Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Double
selectedGenPos
    case [Char]
uniqueAlleles of
        [] -> do -- no non-missing alleles found
            -- logWithEnv LogA . logDebug $
            --     "SNP " ++ show id_ ++ " appears to have no data (both ref and alt allele are blank"
            EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
'N' Char
'N')
        [Char
r] -> do -- only one non-missing allele found
            -- logWithEnv LogA . logDebug $
            --     "SNP " ++ show id_ ++ " appears to be monomorphic (only one of ref and alt alleles are non-blank)"
            EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
'N' Char
r)
        [Char
ref, Char
alt] ->
            EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
genPos ByteString
id_ Char
ref Char
alt)
        [Char]
_ -> IO EigenstratSnpEntry -> m EigenstratSnpEntry
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO EigenstratSnpEntry -> m EigenstratSnpEntry)
-> (PoseidonException -> IO EigenstratSnpEntry)
-> PoseidonException
-> m EigenstratSnpEntry
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> IO EigenstratSnpEntry
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> m EigenstratSnpEntry)
-> PoseidonException -> m EigenstratSnpEntry
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenotypeException ([Char]
"Incongruent alleles: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [EigenstratSnpEntry] -> [Char]
forall a. Show a => a -> [Char]
show [EigenstratSnpEntry]
snpEntries)

recodeAlleles :: EigenstratSnpEntry -> EigenstratSnpEntry -> GenoLine -> Either String GenoLine
recodeAlleles :: EigenstratSnpEntry
-> EigenstratSnpEntry -> GenoLine -> Either [Char] GenoLine
recodeAlleles EigenstratSnpEntry
consensusSnpEntry EigenstratSnpEntry
snpEntry GenoLine
genoLine = do
    let (EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
consRefA Char
consAltA) = EigenstratSnpEntry
consensusSnpEntry
    let (EigenstratSnpEntry Chrom
_ Int
_ Double
_ ByteString
_ Char
refA Char
altA) = EigenstratSnpEntry
snpEntry
    let maybeRecodedGenoline :: Either [Char] GenoLine
maybeRecodedGenoline = case (Char -> Bool
isMissing Char
consRefA, Char -> Bool
isMissing Char
consAltA) of
            (Bool
False, Bool
False) -> Char -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine1 Char
consRefA Char
consAltA Char
refA Char
altA
            (Bool
False, Bool
True)  -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine2 Char
consRefA          Char
refA Char
altA
            (Bool
True, Bool
False)  -> Char -> Char -> Char -> Either [Char] GenoLine
forall {a}. Eq a => a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine3          Char
consAltA Char
refA Char
altA
            (Bool
True, Bool
True)   -> Either [Char] GenoLine
maybeFlipGenoLine4
    case Either [Char] GenoLine
maybeRecodedGenoline of
        Left [Char]
err -> [Char] -> Either [Char] GenoLine
forall a b. a -> Either a b
Left ([Char]
"At snp " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ EigenstratSnpEntry -> [Char]
forall a. Show a => a -> [Char]
show EigenstratSnpEntry
snpEntry [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": allele coding error due to inconsistent \
                           \alleles with consensus alleles ref = " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
consRefA] [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
", alt = " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
consAltA] [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
                           [Char]
". Error: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err)
        Right GenoLine
recodedGenoLine -> GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
recodedGenoLine
  where
    isMissing :: Char -> Bool
isMissing Char
'0' = Bool
True
    isMissing Char
'N' = Bool
True
    isMissing Char
_   = Bool
False
    maybeFlipGenoLine1 :: a -> a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine1 a
consRefA a
consAltA a
refA a
altA
        | (a
refA, a
altA) (a, a) -> (a, a) -> Bool
forall a. Eq a => a -> a -> Bool
== (a
consRefA, a
consAltA) = GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- simple concordance
        | (a
refA, a
altA) (a, a) -> (a, a) -> Bool
forall a. Eq a => a -> a -> Bool
== (a
consAltA, a
consRefA) = GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- alleles flipped
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- refs equal, alts different, need everything HomRef or Missing
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- alts equal, refs different, need everything HomAlt
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- need everything HomRef, then flip
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- need everything HomAlt, then flip
        | Bool
otherwise                            = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    maybeFlipGenoLine2 :: a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine2 a
consRefA a
refA a
altA
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- refs equal, need everything HomRef or Missing
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consRefA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- ref flipped, need everything HomAlt or Missing, then flip
        | Bool
otherwise                            = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    maybeFlipGenoLine3 :: a -> a -> a -> Either [Char] GenoLine
maybeFlipGenoLine3 a
consAltA a
refA a
altA
        | a
refA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomRef (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenoEntry -> GenoEntry) -> GenoLine -> GenoLine
forall a b. (a -> b) -> Vector a -> Vector b
V.map GenoEntry -> GenoEntry
flipGeno GenoLine
genoLine) -- alt flipped, need everything HomAlt or Missing, then flip
        | a
altA a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
consAltA                     = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
HomAlt (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine -- alts equal, need everything HomAlt or Missing
        | Bool
otherwise                            = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    maybeFlipGenoLine4 :: Either [Char] GenoLine
maybeFlipGenoLine4 = GenoEntry -> Either [Char] GenoLine -> Either [Char] GenoLine
forall {b}. GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing (Either [Char] GenoLine -> Either [Char] GenoLine)
-> Either [Char] GenoLine -> Either [Char] GenoLine
forall a b. (a -> b) -> a -> b
$ GenoLine -> Either [Char] GenoLine
forall a. a -> Either [Char] a
forall (m :: * -> *) a. Monad m => a -> m a
return GenoLine
genoLine
    checked :: GenoEntry -> Either [Char] b -> Either [Char] b
checked GenoEntry
Missing Either [Char] b
action = if (GenoEntry -> Bool) -> GenoLine -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any (GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
Missing) GenoLine
genoLine then [Char] -> Either [Char] b
forall a b. a -> Either a b
Left [Char]
"Requiring all genotype missing" else Either [Char] b
action
    checked GenoEntry
t       Either [Char] b
action = if (GenoEntry -> Bool) -> GenoLine -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any (\GenoEntry
g -> GenoEntry
g GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
Missing Bool -> Bool -> Bool
&& GenoEntry
g GenoEntry -> GenoEntry -> Bool
forall a. Eq a => a -> a -> Bool
/= GenoEntry
t) GenoLine
genoLine then [Char] -> Either [Char] b
forall a b. a -> Either a b
Left ([Char]
"requiring all genotypes missing or " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ GenoEntry -> [Char]
forall a. Show a => a -> [Char]
show GenoEntry
t) else Either [Char] b
action
    flipGeno :: GenoEntry -> GenoEntry
flipGeno GenoEntry
HomRef = GenoEntry
HomAlt
    flipGeno GenoEntry
HomAlt = GenoEntry
HomRef
    flipGeno GenoEntry
g      = GenoEntry
g

printSNPCopyProgress :: (MonadIO m) => LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress :: forall (m :: * -> *) a.
MonadIO m =>
LogA -> UTCTime -> Pipe a a m ()
printSNPCopyProgress LogA
logA UTCTime
startTime = do
    IORef Int
counterRef <- IO (IORef Int) -> Proxy () a () a m (IORef Int)
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> Proxy () a () a m (IORef Int))
-> IO (IORef Int) -> Proxy () a () a m (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef (Int
0 :: Int)
    Pipe a a m () -> (a -> Pipe a a m ()) -> Pipe a a m ()
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Pipe a a m ()
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat ((a -> Pipe a a m ()) -> Pipe a a m ())
-> (a -> Pipe a a m ()) -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ \a
val -> do
        Int
n <- IO Int -> Proxy () a () a m Int
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> Proxy () a () a m Int)
-> IO Int -> Proxy () a () a m Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
counterRef
        UTCTime
currentTime <- IO UTCTime -> Proxy () a () a m UTCTime
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO UTCTime
getCurrentTime
        LogA -> ReaderT Env IO () -> Pipe a a m ()
forall (m :: * -> *).
MonadIO m =>
LogA -> ReaderT Env IO () -> m ()
logWithEnv LogA
logA (ReaderT Env IO () -> Pipe a a m ())
-> ReaderT Env IO () -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ Int -> NominalDiffTime -> ReaderT Env IO ()
logProgress Int
n (UTCTime -> UTCTime -> NominalDiffTime
diffUTCTime UTCTime
currentTime UTCTime
startTime)
        IO () -> Pipe a a m ()
forall a. IO a -> Proxy () a () a m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Pipe a a m ()) -> IO () -> Pipe a a m ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef IORef Int
counterRef (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        a -> Pipe a a m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield a
val
    where
        logProgress :: Int -> NominalDiffTime -> PoseidonIO ()
        logProgress :: Int -> NominalDiffTime -> ReaderT Env IO ()
logProgress Int
c NominalDiffTime
t
            |  Int
c Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
10000 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [Char] -> ReaderT Env IO ()
logInfo ([Char] -> ReaderT Env IO ()) -> [Char] -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"SNPs: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> ShowS
padLeft Int
9 (Int -> [Char]
forall a. Show a => a -> [Char]
show Int
c) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"    " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime (NominalDiffTime -> Int
forall b. Integral b => NominalDiffTime -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor NominalDiffTime
t)
            |  Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1000          = [Char] -> ReaderT Env IO ()
logInfo ([Char] -> ReaderT Env IO ()) -> [Char] -> ReaderT Env IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Probing of the first 1000 SNPs successful. Continuing now..."
            | Bool
otherwise = () -> ReaderT Env IO ()
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        prettyTime :: Int -> String
        prettyTime :: Int -> [Char]
prettyTime Int
t
            | Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
60 = Int -> [Char]
forall a. Show a => a -> [Char]
show Int
t [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"s"
            | Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
60 Bool -> Bool -> Bool
&& Int
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
3600 = do
                let (Int
minutes, Int
rest) = Int
t Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
60
                Int -> [Char]
forall a. Show a => a -> [Char]
show Int
minutes [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"m " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime Int
rest
            | Bool
otherwise = do
                let (Int
hours, Int
rest) = Int
t Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
3600
                Int -> [Char]
forall a. Show a => a -> [Char]
show Int
hours   [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"h " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
prettyTime Int
rest


selectIndices :: [Int] -> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices :: [Int]
-> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices [Int]
indices (EigenstratSnpEntry
snpEntry, GenoLine
genoLine) = (EigenstratSnpEntry
snpEntry, [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
V.fromList [GenoLine
genoLine GenoLine -> Int -> GenoEntry
forall a. Vector a -> Int -> a
V.! Int
i | Int
i <- [Int]
indices])