Remove potential bias caused by cloned genotypes in genind or genclone object.
Source:R/data_subset.r
clonecorrect.Rd
This function removes any duplicated multilocus genotypes from any specified population strata.
Arguments
- pop
- strata
a hierarchical formula or numeric vector. This will define the columns of the data frame in the strata slot to use.
- combine
logical
. When set to TRUE, the strata will be combined to create a new population for the clone-corrected genind or genclone object.- keep
integer
. Whencombine
is set toFALSE
, you can use this flag to choose the levels of your population strata. For example: if your clone correction strata is set to "Pop", "Subpop", and "Year", and you want to analyze your populations with respect to year, you can setkeep = c(1,3)
.
Details
This function will clone correct based on the stratification
provided. To clone correct indiscriminately of population structure, set
strata = NA
.
Examples
# LOAD A. euteiches data set
data(Aeut)
# Redefine it as a genclone object
Aeut <- as.genclone(Aeut)
strata(Aeut) <- other(Aeut)$population_hierarchy[-1]
# Check the number of multilocus genotypes
mlg(Aeut)
#> #############################
#> # Number of Individuals: 187
#> # Number of MLG: 119
#> #############################
#> [1] 119
popNames(Aeut)
#> [1] "Athena" "Mt. Vernon"
# Clone correct at the population level.
Aeut.pop <- clonecorrect(Aeut, strata = ~Pop)
mlg(Aeut.pop)
#> #############################
#> # Number of Individuals: 120
#> # Number of MLG: 119
#> #############################
#> [1] 119
popNames(Aeut.pop)
#> [1] "Athena" "Mt. Vernon"
# \dontrun{
# Clone correct at the subpopulation level with respect to population and
# combine.
Aeut.subpop <- clonecorrect(Aeut, strata = ~Pop/Subpop, combine=TRUE)
mlg(Aeut.subpop)
#> #############################
#> # Number of Individuals: 141
#> # Number of MLG: 119
#> #############################
#> [1] 119
popNames(Aeut.subpop)
#> [1] "Athena_1" "Athena_2" "Athena_3" "Athena_4" "Athena_5"
#> [6] "Athena_6" "Athena_7" "Athena_8" "Athena_9" "Athena_10"
#> [11] "Mt. Vernon_1" "Mt. Vernon_2" "Mt. Vernon_3" "Mt. Vernon_4" "Mt. Vernon_5"
#> [16] "Mt. Vernon_6" "Mt. Vernon_7" "Mt. Vernon_8"
# Do the same, but set to the population level.
Aeut.subpop2 <- clonecorrect(Aeut, strata = ~Pop/Subpop, keep=1)
mlg(Aeut.subpop2)
#> #############################
#> # Number of Individuals: 141
#> # Number of MLG: 119
#> #############################
#> [1] 119
popNames(Aeut.subpop2)
#> [1] "Athena" "Mt. Vernon"
# LOAD H3N2 dataset
data(H3N2)
strata(H3N2) <- other(H3N2)$x
# Extract only the individuals located in China
country <- clonecorrect(H3N2, strata = ~country)
# How many isolates did we have from China before clone correction?
sum(strata(H3N2, ~country) == "China") # 155
#> [1] 155
# How many unique isolates from China after clone correction?
sum(strata(country, ~country) == "China") # 79
#> [1] 79
# Something a little more complicated. (This could take a few minutes on
# slower computers)
# setting the hierarchy to be Country > Year > Month
c.y.m <- clonecorrect(H3N2, strata = ~year/month/country)
# How many isolates in the original data set?
nInd(H3N2) # 1903
#> [1] 1903
# How many after we clone corrected for country, year, and month?
nInd(c.y.m) # 1190
#> [1] 1190
# }