Load packages
library (infer)
library (tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.2.0
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.2 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
theme_set (theme_bw (base_size = 16 ) + theme (aspect.ratio = 1 )) # sets a global ggplot2 theme
Generate fake data
set.seed (11 )
species_levels <- paste0 ("sp_" , 1 : 7 )
fake_data <- tibble (
environment = rep (c ("env_a" , "env_b" ), each = 7 ),
species = rep (species_levels, times = 2 ),
n = c (
18 , 14 , 12 , 10 , 9 , 8 , 0 , # env_a
45 , 9 , 11 , 13 , 15 , 17 , 19 # env_b
)
) |>
tidyr:: uncount (n)
fake_data
# A tibble: 200 × 2
environment species
<chr> <chr>
1 env_a sp_1
2 env_a sp_1
3 env_a sp_1
4 env_a sp_1
5 env_a sp_1
6 env_a sp_1
7 env_a sp_1
8 env_a sp_1
9 env_a sp_1
10 env_a sp_1
# ℹ 190 more rows
Shannon diversity index
Shannon diversity index function for use with infer::calculate(). Copy this function into your document and run it so that you can use with later.
stat_shannon <- function (x, order, ...) {
species <- x$ species
props <- table (species) / length (species)
if (length (props) == 1 ) {
return (0 )
}
# Only sum over non-zero proportions to avoid log(0)
h <- - sum (props[props > 0 ] * log (props[props > 0 ]))
h
}
Calculate observed
obs_a <-
fake_data |>
filter (environment == "env_a" ) |>
specify (response = species) |>
calculate (stat = stat_shannon)
obs_b <-
fake_data |>
filter (environment == "env_b" ) |>
specify (response = species) |>
calculate (stat = stat_shannon)
Boostrap sampling distribution
boot_a <-
fake_data |>
filter (environment == "env_a" ) |>
specify (response = species) |>
generate (reps = 10000 , type = "bootstrap" ) |>
calculate (stat = stat_shannon)
boot_b <-
fake_data |>
filter (environment == "env_b" ) |>
specify (response = species) |>
generate (reps = 10000 , type = "bootstrap" ) |>
calculate (stat = stat_shannon)
boot_a |>
visualise ()
Confidence interval
ci_a <-
boot_a |>
get_ci (level = 0.95 , type = "percentile" )
ci_b <-
boot_b |>
get_ci (level = 0.95 , type = "percentile" )
ci_a
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 1.62 1.78
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 1.66 1.86
boot_a |>
visualise () +
shade_ci (endpoints = ci_a)
boot_b |>
visualise () +
shade_ci (endpoints = ci_b)
Plots
obs_shannon <-
bind_rows (
env_a = obs_a,
env_b = obs_b,
.id = "environment"
)
ci_shannon <-
bind_rows (
env_a = ci_a,
env_b = ci_b,
.id = "environment"
)
obs_shannon |>
full_join (ci_shannon, by = join_by (environment)) |>
ggplot (aes (x = environment, y = stat)) +
geom_point (size = 4 ) +
geom_errorbar (aes (ymin = lower_ci, ymax = upper_ci), width = 0.2 )
Simpson diversity index
Simpson diversity index function for use with infer::calculate(). Copy this function into your document and run it so that you can use with later.
stat_simpson <- function (x, order, ...) {
species <- x$ species
props <- table (species) / length (species)
# Simpson index: 1 - sum(p_i^2)
d <- 1 - sum (props^ 2 )
d
}
Calculate observed
obs_simpson_a <-
fake_data |>
filter (environment == "env_a" ) |>
specify (response = species) |>
calculate (stat = stat_simpson)
obs_simpson_b <-
fake_data |>
filter (environment == "env_b" ) |>
specify (response = species) |>
calculate (stat = stat_simpson)
Boostrap sampling distribution
boot_simpson_a <-
fake_data |>
filter (environment == "env_a" ) |>
specify (response = species) |>
generate (reps = 10000 , type = "bootstrap" ) |>
calculate (stat = stat_simpson)
boot_simpson_b <-
fake_data |>
filter (environment == "env_b" ) |>
specify (response = species) |>
generate (reps = 10000 , type = "bootstrap" ) |>
calculate (stat = stat_simpson)
boot_simpson_a |>
visualise ()
boot_simpson_b |>
visualise ()
Confidence interval
ci_simpson_a <-
boot_simpson_a |>
get_ci (level = 0.95 , type = "percentile" )
ci_simpson_b <-
boot_simpson_b |>
get_ci (level = 0.95 , type = "percentile" )
ci_simpson_a
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 0.774 0.828
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 0.753 0.830
boot_simpson_a |>
visualise () +
shade_ci (endpoints = ci_simpson_a)
boot_simpson_b |>
visualise () +
shade_ci (endpoints = ci_simpson_b)
Plots
obs_simpson <-
bind_rows (
env_a = obs_simpson_a,
env_b = obs_simpson_b,
.id = "environment"
)
ci_simpson <-
bind_rows (
env_a = ci_simpson_a,
env_b = ci_simpson_b,
.id = "environment"
)
obs_simpson |>
full_join (ci_simpson, by = join_by (environment)) |>
ggplot (aes (x = environment, y = stat)) +
geom_point (size = 4 ) +
geom_errorbar (aes (ymin = lower_ci, ymax = upper_ci), width = 0.2 )