- R básico tiene funciones para las pruebas mas comunes de estadística:
- correlación
- regresión
- chi-cuadrado …
2017-05-18
Juego de datos:
a <- "http://marceloarayasalas.weebly.com/uploads" b <- "/2/5/5/2/25524573/datos_phaethornis___diastema.rds" download.file(paste0(a, b), destfile = "datos_Phaethornis_&_diastema.RDS") datos <- readRDS("datos_Phaethornis_&_diastema.RDS") phaethornis <- datos[[1]] diastema <- datos[[2]]
phaet coordinados de Phaethornis longirostris: Araya-Salas et.al. 2017. To overlap or not to overlap: context-dependent coordinated singing in long-billed hermits. Animal Behaviour.
Juego de datos Phaethornis longirostris
str(phaethornis)
## 'data.frame': 121 obs. of 6 variables: ## $ coord.sing.event: int 1 2 3 4 5 6 7 8 9 10 ... ## $ Lek : Factor w/ 4 levels "CCE","CCL","LOC",..: 1 1 1 1 1 1 1 1 1 1 ... ## $ pattern : Factor w/ 3 levels "No.ovlp","No.pattern",..: 3 3 3 2 2 1 2 3 3 3 ... ## $ duetdur : num 11.9 33.8 48.7 31.1 45.8 ... ## $ sing.perf : num 0.405 0.454 0.23 0.287 0.217 ... ## $ distcat : Factor w/ 2 levels "Close","Far": NA NA NA NA NA 1 NA NA NA NA ...
Juego de datos Diastema spp
str(diastema)
## 'data.frame': 901 obs. of 25 variables: ## $ species : Factor w/ 3 levels "diastema","vocator",..: 1 1 1 1 1 1 1 1 1 1 ... ## $ sound.files: Factor w/ 216 levels "Dd_AguaBuena_01.wav",..: 1 1 1 1 1 1 1 1 2 2 ... ## $ selec : chr "2" "3" "4" "6" ... ## $ duration : num 0.119 0.118 0.12 0.124 0.121 ... ## $ meanfreq : num 3.5 3.51 3.48 3.48 3.41 ... ## $ sd : num 0.45 0.468 0.461 0.5 0.429 ... ## $ median : num 3.37 3.38 3.31 3.27 3.22 ... ## $ Q25 : num 3.27 3.25 3.24 3.22 3.21 ... ## $ Q75 : num 3.57 3.6 3.56 3.54 3.49 ... ## $ IQR : num 0.302 0.349 0.317 0.323 0.273 ... ## $ skew : num 5.93 6.83 7.3 8.6 10.49 ... ## $ kurt : num 44.4 61.9 66.8 91.6 122.2 ... ## $ sp.ent : num 0.775 0.798 0.775 0.76 0.694 ... ## $ sfm : num 0.252 0.297 0.279 0.256 0.23 ... ## $ mode : num 3.26 3.25 3.25 3.23 3.21 ... ## $ centroid : num 3.5 3.51 3.48 3.48 3.41 ... ## $ peakf : num 3.2 3.21 3.17 3.16 3.17 ... ## $ meandom : num 3.32 3.28 3.23 3.25 3.25 ... ## $ mindom : num 2.15 3.19 3.19 3.19 3.14 ... ## $ maxdom : num 4.74 3.49 3.27 3.4 3.49 ... ## $ dfrange : num 2.584 0.3015 0.0861 0.2153 0.3445 ... ## $ modindx : num 0.258 0.25 0.25 0.25 0.25 ... ## $ startdom : num 2.15 3.49 3.27 3.4 3.49 ... ## $ enddom : num 4.74 3.19 3.19 3.19 3.14 ... ## $ dfslope : num 21.648 -2.563 -0.716 -1.732 -2.843 ...
Mínimo
pqts <- c("corrplot", "MASS", "multcomp", "ggplot2", "vegan", "MuMIn") A <- lapply(pqts, function(y) { if (!y %in% installed.packages()[, "Package"]) install.packages(y) require(y, character.only = T) })
## Loading required package: corrplot
## Loading required package: MASS
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## ## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS': ## ## geyser
## Loading required package: ggplot2
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-1
## Loading required package: MuMIn
Mínimo
x <- phaethornis$duetdur min(x)
## [1] 5.103
Máximo
max(x)
## [1] 438.772
Rango
range(x)
## [1] 5.103 438.772
Varianza
var(x)
## [1] 4449.565
Desviación estándar
sd(x)
## [1] 66.70506
Error estándar
sd(x)/sqrt(length(x))
## [1] 6.064097
Quartiles
quantile(x)
## 0% 25% 50% 75% 100% ## 5.103 23.133 45.970 90.619 438.772
quantile(x, probs = c(0.025, 0.975))
## 2.5% 97.5% ## 5.801 236.978
Resumen
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 5.103 23.130 45.970 68.700 90.620 438.800
Resumen
summary(phaethornis)
## coord.sing.event Lek pattern duetdur ## Min. : 1 CCE:28 No.ovlp :29 Min. : 5.103 ## 1st Qu.: 31 CCL:22 No.pattern:65 1st Qu.: 23.133 ## Median : 61 LOC:41 Ovlp :27 Median : 45.970 ## Mean : 61 SUR:30 Mean : 68.699 ## 3rd Qu.: 91 3rd Qu.: 90.619 ## Max. :121 Max. :438.772 ## sing.perf distcat ## Min. :-1.00000 Close:18 ## 1st Qu.:-0.30568 Far :26 ## Median : 0.06376 NA's :77 ## Mean :-0.04186 ## 3rd Qu.: 0.25334 ## Max. : 1.06368
Frecuencias (cuentas)
table(phaethornis$Lek)
## ## CCE CCL LOC SUR ## 28 22 41 30
Normalidad
shapiro.test(x)
## ## Shapiro-Wilk normality test ## ## data: x ## W = 0.77516, p-value = 2.482e-12
Normalidad
mi.paleta <- colorRampPalette(c("#E37222", "#66B9BF")) cols <- adjustcolor(mi.paleta(10), alpha.f = 0.4) hist(x, breaks = 8, col = sample(cols, 1))
Normalidad
Normalidad (ggplot)
library(ggplot2) ggplot(phaethornis, aes(x = duetdur)) + geom_histogram(color = sample(cols, 1), fill = sample(cols, 1)) + theme_classic()
Normalidad
qqnorm(x, col = sample(cols, 1), pch = 20, cex = 2) qqline(x, col = 2)
Normalidad (ggplot)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Normalidad y transformación de variables
shapiro.test(log(x))
## ## Shapiro-Wilk normality test ## ## data: log(x) ## W = 0.98882, p-value = 0.4284
Normalidad y transformación de variables
hist(log(x), breaks = 15, col = sample(cols, 1))
Normalidad y transformación de variables
qqnorm(log(x), col = sample(cols, 1), pch = 20, cex = 2) qqline(log(x), col = 2)
Igualdad de las varianzas
phaet2 <- phaethornis[phaethornis$Lek %in% c("SUR", "CCL"), ] var.test(phaet2$sing.perf ~ phaet2$Lek)
## ## F test to compare two variances ## ## data: phaet2$sing.perf by phaet2$Lek ## F = 1.3138, num df = 21, denom df = 29, p-value = 0.4887 ## alternative hypothesis: true ratio of variances is not equal to 1 ## 95 percent confidence interval: ## 0.5981496 3.0434723 ## sample estimates: ## ratio of variances ## 1.313809
Prueba T de Student (de 2 muestras):
data(sleep) t.test(extra ~ group, data = sleep)
## ## Welch Two Sample t-test ## ## data: extra by group ## t = -1.8608, df = 17.776, p-value = 0.07939 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -3.3654832 0.2054832 ## sample estimates: ## mean in group 1 mean in group 2 ## 0.75 2.33
Prueba T de Student (de 2 muestras):
x <- sleep$extra[sleep$group == 1] y <- sleep$extra[sleep$group == 2] t.test(x, y)
## ## Welch Two Sample t-test ## ## data: x and y ## t = -1.8608, df = 17.776, p-value = 0.07939 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -3.3654832 0.2054832 ## sample estimates: ## mean of x mean of y ## 0.75 2.33
T pareada
t.test(extra ~ group, data = sleep, paired = T)
## ## Paired t-test ## ## data: extra by group ## t = -4.0621, df = 9, p-value = 0.002833 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -2.4598858 -0.7001142 ## sample estimates: ## mean of the differences ## -1.58
wilcox.test(x = x, y = y)
## Warning in wilcox.test.default(x = x, y = y): cannot compute exact p-value ## with ties
## ## Wilcoxon rank sum test with continuity correction ## ## data: x and y ## W = 25.5, p-value = 0.06933 ## alternative hypothesis: true location shift is not equal to 0
Prueba de Wilcoxon pareada
wilcox.test(x = x, y = y, paired = TRUE)
## Warning in wilcox.test.default(x = x, y = y, paired = TRUE): cannot compute ## exact p-value with ties
## Warning in wilcox.test.default(x = x, y = y, paired = TRUE): cannot compute ## exact p-value with zeroes
## ## Wilcoxon signed rank test with continuity correction ## ## data: x and y ## V = 0, p-value = 0.009091 ## alternative hypothesis: true location shift is not equal to 0
Correlación de Pearson
x2 <- diastema$meanfreq y2 <- diastema$duration cor(x = x2, y = y2)
## [1] -0.5760356
cor.test(x = x2, y = y2)
## ## Pearson's product-moment correlation ## ## data: x2 and y2 ## t = -21.129, df = 899, p-value < 2.2e-16 ## alternative hypothesis: true correlation is not equal to 0 ## 95 percent confidence interval: ## -0.6180935 -0.5306894 ## sample estimates: ## cor ## -0.5760356
cor.test(x = x2, y = y2, method = "spearman")
## Warning in cor.test.default(x = x2, y = y2, method = "spearman"): Cannot ## compute exact p-value with ties
## ## Spearman's rank correlation rho ## ## data: x2 and y2 ## S = 185300000, p-value < 2.2e-16 ## alternative hypothesis: true rho is not equal to 0 ## sample estimates: ## rho ## -0.5200349
Evaluar colinealidad de variables
mat.cor <- cor(diastema[, sapply(diastema, is.numeric)]) head(mat.cor)
## duration meanfreq sd median Q25 ## duration 1.0000000 -0.5760356 0.56732529 -0.5592035 -0.5771093 ## meanfreq -0.5760356 1.0000000 -0.18767105 0.9936121 0.9921899 ## sd 0.5673253 -0.1876710 1.00000000 -0.1491603 -0.2092945 ## median -0.5592035 0.9936121 -0.14916026 1.0000000 0.9909145 ## Q25 -0.5771093 0.9921899 -0.20929449 0.9909145 1.0000000 ## Q75 -0.4985858 0.9880212 -0.07466188 0.9905400 0.9775198 ## Q75 IQR skew kurt sp.ent sfm ## duration -0.49858583 0.08962441 0.5730427 0.4960304 0.08517835 0.3707118 ## meanfreq 0.98802120 0.42731043 -0.8678404 -0.8075071 0.60924625 0.1867889 ## sd -0.07466188 0.50424029 0.1482723 0.1041929 0.47988756 0.8222156 ## median 0.99054000 0.44359825 -0.8836339 -0.8265777 0.62590284 0.2226624 ## Q25 0.97751975 0.34944645 -0.8602681 -0.8032396 0.57542156 0.1594589 ## Q75 1.00000000 0.53914231 -0.8769634 -0.8272779 0.69983409 0.3041923 ## mode centroid peakf meandom mindom maxdom ## duration -0.5801976 -0.5760356 -0.3442749 -0.5520184 -0.4395534 -0.4990648 ## meanfreq 0.9876984 1.0000000 0.3746895 0.9716538 0.7623498 0.9346644 ## sd -0.1694371 -0.1876710 -0.3876864 -0.1818978 -0.2652301 -0.1128540 ## median 0.9932076 0.9936121 0.3483108 0.9723942 0.7490638 0.9371465 ## Q25 0.9874968 0.9921899 0.3781663 0.9700893 0.7595913 0.9296068 ## Q75 0.9788687 0.9880212 0.3167142 0.9603656 0.7343151 0.9314592 ## dfrange modindx startdom enddom dfslope ## duration -0.1826537 0.1169091 -0.2745944 -0.5801700 -0.6637003 ## meanfreq 0.4188517 0.1699135 0.6900515 0.9092519 0.5756101 ## sd 0.1678878 0.1198796 -0.1650753 -0.1855753 -0.2832770 ## median 0.4392773 0.1901571 0.6769922 0.9110402 0.5704379 ## Q25 0.4148611 0.1716737 0.6830710 0.9107326 0.5754538 ## Q75 0.4494820 0.2101372 0.6766135 0.8957976 0.5375374
Evaluar colinealidad de variables (paquete "corrplot")
corrplot(corr = mat.cor)
Evaluar colinealidad de variables (paquete "corrplot")
corrplot(corr = mat.cor, method = "ellipse", col = mi.paleta(200), tl.col = "black")
Evaluar colinealidad de variables (paquete "corrplot")
corrplot(corr = mat.cor[1:6, 1:6], method = "number")
Evaluar colinealidad de variables (paquete "corrplot")
corrplot.mixed(corr = mat.cor[1:6, 1:6], upper = "number", lower = "ellipse")
Evaluar colinealidad de variables
Regresión lineal
reg <- lm(phaethornis$sing.perf ~ phaethornis$duetdur) summary(reg)
## ## Call: ## lm(formula = phaethornis$sing.perf ~ phaethornis$duetdur) ## ## Residuals: ## Min 1Q Median 3Q Max ## -0.97212 -0.25711 0.09902 0.29230 1.09037 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.0208287 0.0558398 -0.373 0.710 ## phaethornis$duetdur -0.0003061 0.0005843 -0.524 0.601 ## ## Residual standard error: 0.427 on 119 degrees of freedom ## Multiple R-squared: 0.002301, Adjusted R-squared: -0.006083 ## F-statistic: 0.2745 on 1 and 119 DF, p-value: 0.6013
Regresión lineal
coef(reg)
## (Intercept) phaethornis$duetdur ## -0.0208287041 -0.0003061277
confint(reg)
## 2.5 % 97.5 % ## (Intercept) -0.131397001 0.0897395931 ## phaethornis$duetdur -0.001463145 0.0008508897
Regresión lineal
reg2 <- lm(duration * 1000 ~ meanfreq, data = diastema) summary(reg2)
## ## Call: ## lm(formula = duration * 1000 ~ meanfreq, data = diastema) ## ## Residuals: ## Min 1Q Median 3Q Max ## -87.584 -20.954 3.414 17.941 104.311 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 209.813 5.612 37.39 <2e-16 *** ## meanfreq -30.271 1.433 -21.13 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 28.75 on 899 degrees of freedom ## Multiple R-squared: 0.3318, Adjusted R-squared: 0.3311 ## F-statistic: 446.4 on 1 and 899 DF, p-value: < 2.2e-16
Regresión lineal
coef(reg2)
## (Intercept) meanfreq ## 209.81317 -30.27141
confint(reg2)
## 2.5 % 97.5 % ## (Intercept) 198.79927 220.82706 ## meanfreq -33.08321 -27.45961
Regresión lineal
plot(diastema$meanfreq, diastema$duration * 1000, pch = 20, col = adjustcolor(sample(cols, 1), alpha.f = 0.3), cex = 2, ylab = "Duración (ms)", xlab = "Frecuencia promedio (kHz)") abline(reg2)
Regresión lineal
ggplot(diastema, aes(x = meanfreq, y = duration)) + geom_point(fill = sample(cols, 1), size = 2, shape = 21) + ylab("Duración (ms)") + xlab("Frecuencia promedio (kHz)") + geom_smooth(method = "lm", col = "red2")
Compara promedios entre grupos
phaet3 <- phaethornis[!is.na(phaethornis$distcat), ] reg3 <- lm(phaet3$sing.perf ~ phaet3$distcat)
Regresión lineal (ANOVA)
anova(reg3)
## Analysis of Variance Table ## ## Response: phaet3$sing.perf ## Df Sum Sq Mean Sq F value Pr(>F) ## phaet3$distcat 1 5.1714 5.1714 80.928 2.394e-11 *** ## Residuals 42 2.6839 0.0639 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ANOVA
reg3 <- lm(phaet3$duetdur ~ phaet3$distcat) anova(reg3)
## Analysis of Variance Table ## ## Response: phaet3$duetdur ## Df Sum Sq Mean Sq F value Pr(>F) ## phaet3$distcat 1 87 86.51 0.0274 0.8694 ## Residuals 42 132777 3161.35
ANOVA: normalidad de residuales
qqnorm(residuals(reg3), col = sample(cols, 1), pch = 20, cex = 2) qqline(residuals(reg3), col = 2)
## Prueba de hipótesis
Pruebas a-posteriori
reg3 <- lm(phaethornis$sing.perf ~ phaethornis$Lek) anova(reg3)
## Analysis of Variance Table ## ## Response: phaethornis$sing.perf ## Df Sum Sq Mean Sq F value Pr(>F) ## phaethornis$Lek 3 3.8367 1.27890 8.3556 4.429e-05 *** ## Residuals 117 17.9079 0.15306 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pruebas a-posteriori
pairwise.t.test(phaethornis$sing.perf, phaethornis$Lek, p.adj = "bonferroni")
## ## Pairwise comparisons using t tests with pooled SD ## ## data: phaethornis$sing.perf and phaethornis$Lek ## ## CCE CCL LOC ## CCL 0.0029 - - ## LOC 6e-05 1.0000 - ## SUR 0.4059 0.3419 0.0485 ## ## P value adjustment method: bonferroni
Pruebas a-posteriori
reg4 <- aov(sing.perf ~ Lek, data = phaethornis) TukeyHSD(reg4)
Pruebas a-posteriori
## Tukey multiple comparisons of means ## 95% family-wise confidence level ## ## Fit: aov(formula = sing.perf ~ Lek, data = phaethornis) ## ## $Lek ## diff lwr upr p adj ## CCL-CCE -0.40071963 -0.691225805 -0.11021345 0.0026487 ## LOC-CCE -0.44290297 -0.692888188 -0.19291775 0.0000588 ## SUR-CCE -0.18961254 -0.457551011 0.07832594 0.2579974 ## LOC-CCL -0.04218334 -0.311664084 0.22729740 0.9769638 ## SUR-CCL 0.21110709 -0.075106618 0.49732080 0.2243343 ## SUR-LOC 0.25329043 0.008306647 0.49827422 0.0398127
Pruebas a-posteriori (paquete 'multcomp')
reg4T <- glht(reg4, linfct = mcp(Lek = "Tukey")) confint(reg4T)
Pruebas a-posteriori (paquete 'multcomp')
## ## Simultaneous Confidence Intervals ## ## Multiple Comparisons of Means: Tukey Contrasts ## ## ## Fit: aov(formula = sing.perf ~ Lek, data = phaethornis) ## ## Quantile = 2.604 ## 95% family-wise confidence level ## ## ## Linear Hypotheses: ## Estimate lwr upr ## CCL - CCE == 0 -0.400720 -0.690968 -0.110471 ## LOC - CCE == 0 -0.442903 -0.692666 -0.193140 ## SUR - CCE == 0 -0.189613 -0.457313 0.078088 ## LOC - CCL == 0 -0.042183 -0.311425 0.227058 ## SUR - CCL == 0 0.211107 -0.074853 0.497067 ## SUR - LOC == 0 0.253290 0.008524 0.498057
Pruebas a-posteriori (paquete 'multcomp')
## Estimate lwr upr ## CCL - CCE -0.40071963 -0.690968055 -0.11047120 ## LOC - CCE -0.44290297 -0.692666391 -0.19313955 ## SUR - CCE -0.18961254 -0.457313285 0.07808821 ## LOC - CCL -0.04218334 -0.311424990 0.22705830 ## SUR - CCL 0.21110709 -0.074852677 0.49706686 ## SUR - LOC 0.25329043 0.008524007 0.49805686 ## attr(,"conf.level") ## [1] 0.95 ## attr(,"calpha") ## [1] 2.604027
Pruebas a-posteriori (paquete 'multcomp')
plot(reg4T)
Pruebas a-posteriori (paquete 'multcomp')
dif.letras <- cld(reg4T) dif.letras
## CCE CCL LOC SUR ## "c" "ab" "a" "bc"
Pruebas a-posteriori
boxplot(phaethornis$sing.perf ~ phaethornis$Lek, col = sample(cols, 1)) text(x = 1:4, y = 0.95, labels = dif.letras$mcletters$Letters)
Pruebas a-posteriori
ggplot(phaethornis, aes(x = Lek, y = sing.perf)) + geom_boxplot(fill = sample(cols, 1)) + annotate(geom = "text", x = 1:4, y = 0.95, label = dif.letras$mcletters$Letters)
Pruebas a-posteriori
## Tukey multiple comparisons of means ## 95% family-wise confidence level ## ## Fit: aov(formula = phaethornis$sing.perf ~ phaethornis$Lek) ## ## $`phaethornis$Lek` ## diff lwr upr p adj ## CCL-CCE -0.40071963 -0.691225805 -0.11021345 0.0026487 ## LOC-CCE -0.44290297 -0.692888188 -0.19291775 0.0000588 ## SUR-CCE -0.18961254 -0.457551011 0.07832594 0.2579974 ## LOC-CCL -0.04218334 -0.311664084 0.22729740 0.9769638 ## SUR-CCL 0.21110709 -0.075106618 0.49732080 0.2243343 ## SUR-LOC 0.25329043 0.008306647 0.49827422 0.0398127
Kruskal-Wallis (ANOVA no paramétrico)
kw <- kruskal.test(phaethornis$sing.perf ~ phaethornis$Lek) kw
## ## Kruskal-Wallis rank sum test ## ## data: phaethornis$sing.perf by phaethornis$Lek ## Kruskal-Wallis chi-squared = 17.787, df = 3, p-value = 0.0004866
Chi-cuadrado
lek.coor <- table(phaet3$distcat, phaet3$Lek) lek.coor
## ## CCE CCL LOC SUR ## Close 1 0 16 1 ## Far 6 0 2 18
lek.coor <- lek.coor[, -2]
Chi-cuadrado
chisq.test(lek.coor)
## Warning in chisq.test(lek.coor): Chi-squared approximation may be incorrect
## ## Pearson's Chi-squared test ## ## data: lek.coor ## X-squared = 29.181, df = 2, p-value = 4.607e-07
Chi-cuadrado con simulación del valor de P con Monte Carlo
chisq.test(lek.coor, simulate.p.value = TRUE, B = 1000)
## ## Pearson's Chi-squared test with simulated p-value (based on 1000 ## replicates) ## ## data: lek.coor ## X-squared = 29.181, df = NA, p-value = 0.000999
Prueba "exacta" de Fisher (similar a chi-cuadrado para n pequeño)
fisher.test(lek.coor, simulate.p.value = TRUE, B = 1000)
## ## Fisher's Exact Test for Count Data with simulated p-value (based ## on 1000 replicates) ## ## data: lek.coor ## p-value = 0.000999 ## alternative hypothesis: two.sided
Prueba de Mantel
set.seed(10) indc <- sample(1:nrow(diastema), size = 30) dom.var <- diastema[indc, grep("dom", names(diastema))] non.dom.var <- diastema[indc, grep("dom", names(diastema), invert = TRUE)] non.dom.var <- non.dom.var[sapply(non.dom.var, is.numeric)] dom.dist <- dist(scale(dom.var)) non.dom.dist <- dist(scale(non.dom.var))
Prueba de Mantel (paquete 'vegan')
mantel(dom.dist, non.dom.dist, permutations = 1000)
## ## Mantel statistic based on Pearson's product-moment correlation ## ## Call: ## mantel(xdis = dom.dist, ydis = non.dom.dist, permutations = 1000) ## ## Mantel statistic r: 0.7093 ## Significance: 0.000999 ## ## Upper quantiles of permutations (null model): ## 90% 95% 97.5% 99% ## 0.0872 0.1177 0.1464 0.1674 ## Permutation: free ## Number of permutations: 1000
Métodos de aleatorización (Monte Carlo)
Métodos de aleatorización (Monte Carlo)
Prueba T simple:
t.test(extra ~ group, data = sleep)
## ## Welch Two Sample t-test ## ## data: extra by group ## t = -1.8608, df = 17.776, p-value = 0.07939 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -3.3654832 0.2054832 ## sample estimates: ## mean in group 1 mean in group 2 ## 0.75 2.33
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
reps <- 10000 dif.prom <- NULL sleep1 <- sleep for (i in 1:reps) { sleep1$group <- sample(sleep1$group, nrow(sleep1)) m1 <- mean(sleep1$extra[sleep1$group == "1"]) m2 <- mean(sleep1$extra[sleep1$group == "2"]) dif.prom[i] <- m1 - m2 } m1 <- mean(sleep$extra[sleep$group == "1"]) m2 <- mean(sleep$extra[sleep$group == "2"])
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
obs <- m1 - m2 obs
## [1] -1.58
pval <- length(dif.prom[dif.prom <= obs])/reps pval
## [1] 0.04
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
hist(dif.prom, col = sample(cols, 1), main = NULL, breaks = 20, xlim = c(range(c(obs, dif.prom)))) abline(v = obs, col = "red", lty = 3, lwd = 4)
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
sleep2 <- sleep sleep2$extra[sleep2$group == 1] <- rnorm(n = 10, mean = 10) sleep2$extra[sleep2$group == 2] <- rnorm(n = 10, mean = 12)
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
reps <- 1000 dif.prom <- NULL sleep1 <- sleep2 for (i in 1:reps) { sleep1$group <- sample(sleep1$group, nrow(sleep1)) m1 <- mean(sleep1$extra[sleep1$group == "1"]) m2 <- mean(sleep1$extra[sleep1$group == "2"]) dif.prom[i] <- m1 - m2 } m1 <- mean(sleep2$extra[sleep2$group == "1"]) m2 <- mean(sleep2$extra[sleep2$group == "2"])
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
obs <- m1 - m2 pval <- length(dif.prom[dif.prom <= obs])/reps obs
## [1] -2.069347
pval
## [1] 0
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
hist(dif.prom, col = sample(cols, 1), main = NULL, breaks = 20, xlim = c(range(c(obs, dif.prom)))) abline(v = obs, col = "red", lty = 3, lwd = 4)
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
sleep2 <- sleep sleep2$extra[sleep2$group == 1] <- rnorm(n = 10, mean = 10) sleep2$extra[sleep2$group == 2] <- rnorm(n = 10, mean = 10)
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
reps <- 1000 dif.prom <- NULL sleep1 <- sleep2 for (i in 1:reps) { sleep1$group <- sample(sleep1$group, nrow(sleep1)) m1 <- mean(sleep1$extra[sleep1$group == "1"]) m2 <- mean(sleep1$extra[sleep1$group == "2"]) dif.prom[i] <- m1 - m2 } m1 <- mean(sleep2$extra[sleep2$group == "1"]) m2 <- mean(sleep2$extra[sleep2$group == "2"])
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
obs <- m1 - m2 pval <- length(dif.prom[dif.prom <= obs])/reps obs
## [1] 0.6440983
pval
## [1] 0.915
Métodos de aleatorización (Monte Carlo)
T pareada por aleatorización:
hist(dif.prom, col = sample(cols, 1), main = NULL, breaks = 20, xlim = c(range(c(obs, dif.prom)))) abline(v = obs, col = "red", lty = 3, lwd = 4)
Regresión logística
phaet3$distcat2 <- as.character(phaet3$distcat) phaet3$distcat2[phaet3$distcat2 == "Far"] <- 1 phaet3$distcat2[phaet3$distcat2 == "Close"] <- 0 phaet3$distcat2 <- as.numeric(phaet3$distcat2) log.mod <- glm(phaet3$distcat2 ~ phaet3$sing.perf, family = "binomial") summary(log.mod)
Regresión logística
## ## Call: ## glm(formula = phaet3$distcat2 ~ phaet3$sing.perf, family = "binomial") ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -2.04089 -0.09513 0.11338 0.30777 1.96271 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 2.638 0.884 2.985 0.00284 ** ## phaet3$sing.perf 9.821 3.180 3.088 0.00202 ** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 59.534 on 43 degrees of freedom ## Residual deviance: 19.439 on 42 degrees of freedom ## AIC: 23.439 ## ## Number of Fisher Scoring iterations: 7
Regresión logística
plot(phaet3$sing.perf, phaet3$distcat2, xlab = "Coordinación de canto", ylab = "Distancia (prob)", col = sample(cols, 1), pch = 20, cex = 2) curve(plogis(log.mod$coefficients[1] + (log.mod$coefficients[2] * x)), add = TRUE, col = "black", lwd = 3)
Regresión logística
Regresión logística
ggplot(phaet3, aes(x = sing.perf, y = distcat2)) + geom_point(size = 3, col = sample(cols, 1)) + stat_smooth(method = "glm", method.args = list(family = "binomial"), col = "red2") + theme_classic() + xlab("Coordinación de canto") + ylab("Distancia (probabilidad)")
Regresión logística
Regresión logística por aleatorización
reps <- 1000 dif.prom <- NULL datos <- phaet3 coef.sim <- sapply(1:reps, function(x) { datos$distcat2 <- sample(datos$distcat2, nrow(datos)) log.mod <- glm(datos$distcat2 ~ datos$sing.perf, family = "binomial") cf <- as.numeric(coef(log.mod)[2]) return(cf) })
Regresión logística por aleatorización
log.mod <- glm(phaet3$distcat2 ~ phaet3$sing.perf, family = "binomial") obs <- coef(log.mod)[2] pval <- length(coef.sim[coef.sim >= obs])/reps pval hist(coef.sim, col = sample(cols, 1), main = NULL, breaks = 20, xlim = c(range(c(obs, coef.sim)))) abline(v = obs, col = "red", lty = 3, lwd = 4)
Regresión logística por aleatorización
## [1] "p = 0"
Regresión logística por aleatorización
phaet3$sing.perf <- rnorm(nrow(phaet3)) log.mod <- glm(phaet3$distcat2 ~ phaet3$sing.perf, family = "binomial") obs <- coef(log.mod)[2] pval <- length(coef.sim[coef.sim >= obs])/reps pval hist(coef.sim, col = sample(cols, 1), main = NULL, breaks = 20, xlim = c(range(c(obs, coef.sim)))) abline(v = obs, col = "red", lty = 3, lwd = 4)
Regresión logística por aleatorización
## [1] "p = 0.438"
Calcule un valor de P para la prueba de chi-cuadrado utilizando aleatorización
lek.coor <- table(phaet3$distcat, phaet3$Lek) lek.coor <- lek.coor[, -2] chisq.test(lek.coor)
Calcule un valor de P para la prueba de correlación
x2 <- diastema$meanfreq y2 <- diastema$duration cor(x = x2, y = y2) cor.test(x = x2, y = y2)
Análisis de componentes principales (PCA)
Análisis de componentes principales
vars <- diastema[, sapply(diastema, is.numeric)] non.dom.var <- diastema[indc, grep("dom", names(diastema), invert = TRUE)] pca <- prcomp(vars, scale. = TRUE)
Análisis de componentes principales
summary(pca)
## Importance of components: ## PC1 PC2 PC3 PC4 PC5 PC6 ## Standard deviation 3.600 1.8754 1.36440 0.95328 0.84964 0.74867 ## Proportion of Variance 0.589 0.1599 0.08462 0.04131 0.03281 0.02548 ## Cumulative Proportion 0.589 0.7489 0.83354 0.87485 0.90766 0.93314 ## PC7 PC8 PC9 PC10 PC11 PC12 ## Standard deviation 0.67591 0.53852 0.51463 0.44915 0.34442 0.2298 ## Proportion of Variance 0.02077 0.01318 0.01204 0.00917 0.00539 0.0024 ## Cumulative Proportion 0.95390 0.96708 0.97912 0.98829 0.99368 0.9961 ## PC13 PC14 PC15 PC16 PC17 PC18 ## Standard deviation 0.18925 0.13113 0.1144 0.09241 0.07102 0.06457 ## Proportion of Variance 0.00163 0.00078 0.0006 0.00039 0.00023 0.00019 ## Cumulative Proportion 0.99771 0.99849 0.9991 0.99948 0.99971 0.99990 ## PC19 PC20 PC21 PC22 ## Standard deviation 0.04779 5.837e-16 4.93e-16 2.2e-16 ## Proportion of Variance 0.00010 0.000e+00 0.00e+00 0.0e+00 ## Cumulative Proportion 1.00000 1.000e+00 1.00e+00 1.0e+00
Análisis de componentes principales
imp <- summary(pca)$importance barplot(imp[2, 1:10])
Análisis de componentes principales
pcs <- as.data.frame(pca$x) pcs$species <- diastema$species head(pcs)
## PC1 PC2 PC3 PC4 PC5 PC6 ## 1 -0.7884148 -1.2561829 4.1099702 1.1321605 -1.2402470 -1.1352864 ## 2 -1.9068187 -1.2354301 -0.4122830 -0.2365529 -0.4641854 0.3735873 ## 3 -2.2905276 -0.9350050 -0.4343105 -0.2585757 -0.3929729 0.7162968 ## 4 -2.5182427 -1.0071133 -0.4669502 -0.5864267 -0.6066489 0.3853390 ## 5 -3.0048630 -0.3065238 -0.4472219 -0.7996769 -0.7101807 -0.1387036 ## 6 -2.8694257 -0.8068719 -0.3504365 -0.4367792 1.3926818 -0.3023769 ## PC7 PC8 PC9 PC10 PC11 PC12 ## 1 -0.6848606 0.21863776 0.24107975 -2.10746619 -0.16729587 -0.01101668 ## 2 0.7519019 -0.17769025 0.17379809 -0.02568366 -0.22727542 -0.30236271 ## 3 0.5374597 -0.22946264 0.38401607 0.10631092 -0.08202004 -0.25213400 ## 4 0.1881191 0.03137748 0.05269968 0.06956608 0.11697315 -0.40840227 ## 5 -0.2593489 0.46783540 -0.20898825 0.18066723 -0.05664595 -0.12008633 ## 6 -0.3369962 0.46618986 -0.05931028 0.05904335 0.03188471 -0.15333313 ## PC13 PC14 PC15 PC16 PC17 PC18 ## 1 0.78145239 0.098741838 0.22600905 0.038385402 -0.16386864 0.12558805 ## 2 -0.01205518 -0.005124535 0.05341263 0.002422048 -0.05950918 0.05503894 ## 3 -0.01445038 0.007916565 0.06692819 0.037739635 -0.07540013 0.07903032 ## 4 0.03073569 0.009462144 0.05727527 0.049229144 -0.08287980 0.09292486 ## 5 -0.02781749 0.053970247 0.03773885 0.035433841 -0.03055311 0.03323801 ## 6 -0.07581194 0.075035063 0.08989460 0.093057910 -0.03594109 0.07579495 ## PC19 PC20 PC21 PC22 species ## 1 0.046026003 -5.270996e-16 -3.241994e-17 -2.560152e-16 diastema ## 2 0.062900751 5.583089e-17 1.178914e-16 -2.962271e-16 diastema ## 3 0.010426499 6.567764e-17 1.381311e-16 -1.944968e-16 diastema ## 4 -0.006178869 -4.665470e-16 -7.066256e-18 -2.324066e-16 diastema ## 5 -0.032677220 -7.121393e-17 3.675287e-16 -3.998867e-16 diastema ## 6 -0.037143139 4.264621e-16 7.484578e-16 -8.440185e-17 diastema
Análisis de componentes principales
ggplot(pcs, aes(x = PC1, y = PC2, color = species)) + geom_point()
Análisis de función discriminante
Análisis de función discriminante
datos <- diastema[, -c(2, 3)] fd1 <- lda(species ~ ., datos)
## Warning in lda.default(x, grouping, ...): variables are collinear
fd2 <- lda(species ~ ., pcs[, c(1, 5, ncol(pcs))])
Análisis de función discriminante
pcs$pred <- predict(fd2, pcs[, c(1, 5)])$class ct <- table(pcs$pred, pcs$species) diag(prop.table(ct, 1))
## diastema vocator hylaeformis ## 0.9357639 0.8937198 0.9830508
sum(diag(prop.table(ct)))
## [1] 0.9322974
Criterio de Información de Akaike (AIC)
Criterio de Información de Akaike (AIC) - Estimación relativa (sin unidades) de la información que se pierde cuando se reproduce el proceso que generó los datos - Penaliza el poder predictivo del modelo por el número de parámetros
reg4 <- lm(sing.perf ~ distcat, data = phaet3) reg5 <- lm(sing.perf ~ distcat + duetdur, data = phaet3) reg6 <- lm(sing.perf ~ distcat + Lek, data = phaet3) reg7 <- lm(sing.perf ~ distcat + Lek + duetdur, data = phaet3)
Criterio de Información de Akaike (AIC)
aictab <- AIC(reg4, reg5, reg6, reg7) aictab[order(aictab$AIC), ]
## df AIC ## reg5 4 144.6836 ## reg4 3 147.5700 ## reg7 6 148.0271 ## reg6 5 150.8836
Criterio de Información Bayesiano (BIC)
bictab <- BIC(reg4, reg5, reg6, reg7) bictab[order(bictab$BIC), ]
## df BIC ## reg5 4 151.8203 ## reg4 3 152.9226 ## reg7 6 158.7322 ## reg6 5 159.8045
Criterio de Información de Akaike (AIC) (paquete "MuMIn")
reg7 <- lm(sing.perf ~ distcat + Lek + duetdur, data = phaet3) options(na.action = "na.fail") mglob <- dredge(reg7, evaluate = T, rank = "AICc")
## Fixed term is "(Intercept)"
Criterio de Información de Akaike (AIC) (paquete "MuMIn")
model.avg(mglob)
## ## Call: ## model.avg(object = mglob) ## ## Component models: ## '2' '12' '(Null)' '23' '1' '3' '123' '13' ## ## Coefficients: ## (Intercept) duetdur distcatFar LekLOC LekSUR ## full -0.5752548 0.005545117 0.02877135 -0.04890541 -0.03214358 ## subset -0.5752548 0.007136108 0.11875459 -0.45469026 -0.29884980
Criterio de Información de Akaike (AIC) (paquete "MuMIn")
# 95% AIC weight m95 <- mglob[1:(max(which(cumsum(mglob$weight) <= 0.95)) + 1), ] m95
## Global model call: lm(formula = sing.perf ~ distcat + Lek + duetdur, data = phaet3) ## --- ## Model selection table ## (Intrc) dstct dutdr Lek df logLik AICc delta weight ## 3 -0.7099 0.007102 3 -68.427 143.5 0.00 0.547 ## 4 -0.7979 + 0.007135 4 -68.342 145.7 2.26 0.177 ## 1 -0.1729 2 -70.841 146.0 2.52 0.155 ## 7 -0.3881 0.007354 + 5 -68.014 147.6 4.15 0.069 ## 2 -0.2463 + 3 -70.785 148.2 4.72 0.052 ## Models ranked by AICc(x)
# plot(dds1) importance(m95)
## duetdur distcat Lek ## Importance: 0.79 0.23 0.07 ## N containing models: 3 2 1
model.avg(m95)
## ## Call: ## model.avg(object = m95) ## ## Component models: ## '2' '12' '(Null)' '23' '1' ## ## Coefficients: ## (Intercept) duetdur distcatFar LekLOC LekSUR ## full -0.5961007 0.005656185 0.0320591 -0.03172369 -0.02409206 ## subset -0.5961007 0.007131638 0.1400367 -0.46251403 -0.35124899
Criterio de Información de Akaike (AIC) (paquete "MuMIn")
ma <- model.avg(m95) ma
## ## Call: ## model.avg(object = m95) ## ## Component models: ## '2' '12' '(Null)' '23' '1' ## ## Coefficients: ## (Intercept) duetdur distcatFar LekLOC LekSUR ## full -0.5961007 0.005656185 0.0320591 -0.03172369 -0.02409206 ## subset -0.5961007 0.007131638 0.1400367 -0.46251403 -0.35124899
Criterio de Información de Akaike (AIC) (paquete "MuMIn")
confint(ma)
## 2.5 % 97.5 % ## (Intercept) -1.3755276958 0.18332631 ## duetdur 0.0005981982 0.01366508 ## distcatFar -0.6013867666 0.88146009 ## LekLOC -1.5377578668 0.61272980 ## LekSUR -1.4342458690 0.73174789