Let’s predict length from width.
head(KidsFeet)
## name birthmonth birthyear length width sex biggerfoot domhand
## 1 David 5 88 24.4 8.4 B L R
## 2 Lars 10 87 25.4 8.8 B L L
## 3 Zach 12 87 24.5 9.7 B R R
## 4 Josh 1 88 25.2 9.8 B L R
## 5 Lang 2 88 25.1 8.9 B L R
## 6 Scotty 3 88 25.7 9.7 B R R
gf_point(length ~ width, data = KidsFeet) %>%
gf_lm(length ~ width)
Here is the summary several times over using different styes. msummary()
gives a more minimal summary.
Kids.model <- lm(length ~ width, data = KidsFeet)
msummary(Kids.model)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.8172 2.9381 3.341 0.00192 **
## width 1.6576 0.3262 5.081 1.1e-05 ***
##
## Residual standard error: 1.025 on 37 degrees of freedom
## Multiple R-squared: 0.411, Adjusted R-squared: 0.3951
## F-statistic: 25.82 on 1 and 37 DF, p-value: 1.097e-05
Kids.model %>% msummary()
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.8172 2.9381 3.341 0.00192 **
## width 1.6576 0.3262 5.081 1.1e-05 ***
##
## Residual standard error: 1.025 on 37 degrees of freedom
## Multiple R-squared: 0.411, Adjusted R-squared: 0.3951
## F-statistic: 25.82 on 1 and 37 DF, p-value: 1.097e-05
lm(length ~ width, data = KidsFeet) %>%
msummary()
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.8172 2.9381 3.341 0.00192 **
## width 1.6576 0.3262 5.081 1.1e-05 ***
##
## Residual standard error: 1.025 on 37 degrees of freedom
## Multiple R-squared: 0.411, Adjusted R-squared: 0.3951
## F-statistic: 25.82 on 1 and 37 DF, p-value: 1.097e-05
anova(Kids.model)
## Analysis of Variance Table
##
## Response: length
## Df Sum Sq Mean Sq F value Pr(>F)
## width 1 27.114 27.1136 25.819 1.097e-05 ***
## Residuals 37 38.856 1.0502
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# create a function
predicted.length <- makeFun(Kids.model)
# use it to make a prediction
predicted.length(width = 8.5)
## 1
## 23.90702
# use it to make multiple predictions
predicted.length(width = c(8, 8.5, 9))
## 1 2 3
## 23.07820 23.90702 24.73583
# or predict for all widths in the data set
fitted(Kids.model)
## 1 2 3 4 5 6 7 8
## 23.74125 24.40430 25.89616 26.06193 24.57007 25.89616 25.73040 24.40430
## 9 10 11 12 13 14 15 16
## 25.23312 24.40430 26.06193 24.57007 24.90159 26.06193 25.23312 22.91244
## 17 18 19 20 21 22 23 24
## 24.23854 24.40430 24.73583 25.56464 25.06735 24.07278 23.57549 24.73583
## 25 26 27 28 29 30 31 32
## 23.24397 25.39888 25.56464 25.56464 24.57007 25.23312 25.23312 24.07278
## 33 34 35 36 37 38 39
## 24.07278 24.73583 24.07278 23.90702 24.73583 22.91244 24.40430
predicted.length( width = 9, interval = "confidence")
## fit lwr upr
## 1 24.73583 24.4033 25.06835
predicted.length( width = 9, interval = "prediction")
## fit lwr upr
## 1 24.73583 22.63299 26.83866
gf_point(length ~ width, data = KidsFeet) %>%
gf_lm(length ~ width) %>%
gf_pointrange(24.73 + 24.4 + 25.06 ~ 9, color = "red")
gf_pointrange()
## gf_pointrange uses a formula with shape y + ymin + ymax ~ x.
## Additional attributes include:
## alpha, color, group, linetype, size, fatten = 2
## For more information, try ?gf_pointrange
apropos("gf_")
## [1] "gf_abline" "gf_area" "gf_bar" "gf_boxplot"
## [5] "gf_coefline" "gf_col" "gf_contour" "gf_count"
## [9] "gf_counts" "gf_crossbar" "gf_curve" "gf_dens"
## [13] "gf_density" "gf_density_2d" "gf_density2d" "gf_dotplot"
## [17] "gf_errorbar" "gf_errorbarh" "gf_facet_grid" "gf_facet_wrap"
## [21] "gf_frame" "gf_freqpoly" "gf_fun" "gf_function"
## [25] "gf_hex" "gf_histogram" "gf_hline" "gf_jitter"
## [29] "gf_label" "gf_labs" "gf_lims" "gf_line"
## [33] "gf_linerange" "gf_lm" "gf_path" "gf_point"
## [37] "gf_pointrange" "gf_qq" "gf_quantile" "gf_raster"
## [41] "gf_rect" "gf_refine" "gf_ribbon" "gf_rug"
## [45] "gf_segment" "gf_smooth" "gf_spline" "gf_spoke"
## [49] "gf_step" "gf_text" "gf_theme" "gf_tile"
## [53] "gf_violin" "gf_vline"
confint(Kids.model, level = 0.9 )
## 5 % 95 %
## (Intercept) 4.860349 14.774075
## width 1.107251 2.207997
Kids.model2 <- lm( length ~ width + domhand, data = KidsFeet)
msummary(Kids.model2)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.83401 2.98622 3.293 0.00223 **
## width 1.65288 0.33629 4.915 1.95e-05 ***
## domhandR 0.03255 0.41892 0.078 0.93849
##
## Residual standard error: 1.039 on 36 degrees of freedom
## Multiple R-squared: 0.4111, Adjusted R-squared: 0.3784
## F-statistic: 12.57 on 2 and 36 DF, p-value: 7.256e-05
mplot(Kids.model2, which = 1:7, system = "gg")
## [[1]]
## `geom_smooth()` using method = 'loess'
##
## [[2]]
##
## [[3]]
## `geom_smooth()` using method = 'loess'
##
## [[4]]
##
## [[5]]
## `geom_smooth()` using method = 'loess'
##
## [[6]]
## `geom_smooth()` using method = 'loess'
##
## [[7]]
gf_lm(length ~ width, color = ~domhand, data = KidsFeet)
plotModel(Kids.model2, system = "gg")
Kids.model3 <- lm( length ~ width * domhand, data = KidsFeet)
msummary(Kids.model3)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.1763 11.5455 1.834 0.0751 .
## width 0.3658 1.3095 0.279 0.7816
## domhandR -12.1304 11.9672 -1.014 0.3177
## width:domhandR 1.3779 1.3549 1.017 0.3161
##
## Residual standard error: 1.038 on 35 degrees of freedom
## Multiple R-squared: 0.428, Adjusted R-squared: 0.379
## F-statistic: 8.73 on 3 and 35 DF, p-value: 0.0001854
plotModel(Kids.model3, system = "gg")
Take a look in the effects
package for a general way to plot complicted models.
Kids.model4 <- lm( log(length) ~ poly(width,2) * domhand, data = KidsFeet)
msummary(Kids.model4)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.18004 0.03694 86.082 <2e-16 ***
## poly(width, 2)1 -0.02044 0.21315 -0.096 0.924
## poly(width, 2)2 -0.14225 0.28655 -0.496 0.623
## domhandR 0.02617 0.03777 0.693 0.493
## poly(width, 2)1:domhandR 0.24313 0.21793 1.116 0.273
## poly(width, 2)2:domhandR 0.14893 0.29018 0.513 0.611
##
## Residual standard error: 0.04313 on 33 degrees of freedom
## Multiple R-squared: 0.4336, Adjusted R-squared: 0.3477
## F-statistic: 5.052 on 5 and 33 DF, p-value: 0.001515
glm(sex ~ length * domhand, data = KidsFeet, family = binomial(link = logit)) %>%
msummary()
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.2086 18.0157 -0.955 0.3395
## length 0.6826 0.7352 0.928 0.3532
## domhandR 39.2148 20.4105 1.921 0.0547 .
## length:domhandR -1.5661 0.8302 -1.886 0.0592 .
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.040 on 38 degrees of freedom
## Residual deviance: 45.155 on 35 degrees of freedom
## AIC: 53.155
##
## Number of Fisher Scoring iterations: 4
glm(sex ~ length * domhand, data = KidsFeet, family = binomial(link = logit)) %>%
plotModel(system = "gg")
lm( length ~ sex * domhand, data = KidsFeet) %>%
anova()
## Analysis of Variance Table
##
## Response: length
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 5.988 5.9882 4.0263 0.05257 .
## domhand 1 1.723 1.7227 1.1583 0.28918
## sex:domhand 1 6.205 6.2049 4.1721 0.04868 *
## Residuals 35 52.054 1.4872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aov( length ~ sex * domhand, data = KidsFeet)
## Call:
## aov(formula = length ~ sex * domhand, data = KidsFeet)
##
## Terms:
## sex domhand sex:domhand Residuals
## Sum of Squares 5.98815 1.72266 6.20492 52.05350
## Deg. of Freedom 1 1 1 35
##
## Residual standard error: 1.219526
## Estimated effects may be unbalanced
# lattice version of interaction plot
xyplot(length ~ sex, groups = domhand, type = c("a", "p"), data = KidsFeet)
# ggformula version
gf_point(length ~ sex, color = ~domhand, data = KidsFeet) %>%
gf_line(length ~ sex, color = ~domhand, data = KidsFeet,
stat = "summary", group = ~domhand, fun.y = mean)
nls( length ~ a + width^p, data = KidsFeet, start = list(a = 0, p = 1)) %>%
summary()
##
## Formula: length ~ a + width^p
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 11.86976 1.82905 6.49 1.37e-07 ***
## p 1.16251 0.06447 18.03 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.025 on 37 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 4.768e-08