This post demonstrates how to fit non-linear least square (NLS) to predict lizard weight based on snout-length using lizards sampled by pitfall trap at Jornada basin LTER from 1989 - 2006.
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(tidyverse)
library(here)
library(broom)
library(kableExtra)
# Establish the fromula that describe the relationship between weight and SVL
length_to_weight <- function(a,SV_length,b) {
out = a*(SV_length)^b
return(out)
}
# Log-transform weight and SVL
lizards <- lizards %>%
mutate(log_length = log(SV_length)) %>%
mutate(log_weight = log(weight))
# Build up OLS regression to get first guess of parameters a and b
my_guess_model <- lm(log_weight~log_length, data = lizards)
# according to coefficients above, b = 2.53712, a = e^-8.47545
# store the guess of a and b as guess_vec
guess_vec=c(exp(my_guess_model$coefficients[1]), my_guess_model$coefficients[2])
lizards_nls = nls(weight~length_to_weight(a,SV_length,b),
data = lizards,
start = list(a = guess_vec[1], b = guess_vec[2]))
# Put model output in `kable` format
lizards_nls_tidy <- broom::tidy(lizards_nls)
lizards_nls_tidy %>%
kbl(caption = "non-linear least square model for all lizard species and sex ",
col.names = c( 'Parameters',
'Coefficient_Estimates',
'Standard_Error',
'Statistics',
'P_Value'),
digits =c(0,5,5,5,5) ) %>%
kable_classic(full_width = F, html_font = "Cambria",position = "center")
Parameters | Coefficient_Estimates | Standard_Error | Statistics | P_Value |
---|---|---|---|---|
a | 0.00034 | 0.00004 | 8.53842 | 0 |
b | 2.45321 | 0.02698 | 90.93001 | 0 |
# Make prediction from NLS model
lizards <- lizards%>%
mutate(prediction = predict(lizards_nls,newdata = lizards))
# Make graph based on prediction
ggplot() +
geom_point(data = lizards,aes(x = SV_length, y = weight, color = sex)) +
geom_line (data = lizards,aes(x = SV_length, y = prediction)) +
labs(x = "Snout to Vent Length", y = "Weight")
Figure 1: This figure demonstrates the result of the fitted model lay above the points from original dataset, with color differentiate the sex of lizards. The black line represents the result of fitted model, with the weight predicted from the nls model above (with SV length from original dataset). The points represent the SV length and weight measured from the real world.
# Filter original dataset only to male Western whiptail lizard
lizards_CNTI <- lizards %>%
filter(sex == "M", spp == "CNTI") %>%
select(-prediction)
# Establish OLS regression model to get parameter a and b
my_guess_model_CNTI <- lm(log_weight~log_length, data = lizards_CNTI)
# store parameter a and b in the guess_vec_CNTI
guess_vec_CNTI <- c(exp(my_guess_model_CNTI$coefficients[1]), my_guess_model_CNTI$coefficients[2])
lizards_nls_CNTI = nls(weight~length_to_weight(a,SV_length,b),
data = lizards_CNTI,
start = list(a = guess_vec_CNTI[1], b = guess_vec_CNTI[2]))
# Make prediction on the weight of Male CNTI lizard by using both general species NLS and species specific NLS
lizards_CNTI <- lizards_CNTI %>%
mutate(prediction_spp_nls = predict(lizards_nls_CNTI,newdata = lizards_CNTI)) %>%
mutate(prediction_general_nls = predict(lizards_nls,newdata = lizards_CNTI))
# Establish the function of RMSE
calc_rmse <- function (x,y) {
rmse_result <- (x - y)^2 %>% mean() %>% sqrt()
return(rmse_result)
}
# calculate RMSE for general NLS and species specific NLS
rmse_spp_nls = calc_rmse(lizards_CNTI$weight, lizards_CNTI$prediction_spp_nls)
rmse_general_nls = calc_rmse(lizards_CNTI$weight, lizards_CNTI$prediction_general_nls)
rmse_result <- c(rmse_general_nls, rmse_spp_nls)
rmse_result
[1] 3.562172 3.349286
ggplot() +
geom_line (data = lizards_CNTI,aes(x = SV_length, y = prediction_general_nls),color = "blue") +
geom_line (data = lizards_CNTI, aes(x = SV_length, y = prediction_spp_nls), color = "red") +
geom_point(data = lizards_CNTI, aes(x = SV_length, y = weight), color = "purple") +
labs(x = "Snout to Vent Length", y = "Weight")
Figure 2: This figure demonstrates the result of the fitted model from both NLS general and NLS species specific models. The red line represents the fitted model from NLS species specific model, and the blue line represents the fitted model from NLS general model. The purple dots represent the original data for Western whiptail lizard. From the graph, it can be seen that the two fitted models are closed enough, but from RMSE result above, it shows NLS species specific model performs better than NLS general model on species specific data with lower RMSE (general RMSE 3.56 > species specific RMSE 3.35). therefore, we should establish NLS model for each subsets we want to look at rather than use the general model to predict all.