I hit a project where some statistical work needs to be done and found myself in the new country of "R". The r-project.org is a opensource Statistical tool that is widely used in the research arena. Installation is very simple, and you are then meet by a R Console.
Using khanacademy.org for refreshing probability and regression i found this nice simple example:
https://www.khanacademy.org/math/probability/regression/regression-correlation/v/fitting-a-line-to-data
Simple enough to generate using Excel but I would like to recreate this in "R".
This was the end solution as a script.
# The short answer
# https://www.khanacademy.org/math/probability/regression/regression-correlation/v/fitting-a-line-to-data
# Trying to make R do that same work as Excel did for this video
# Annual income from 1995 in the USA
# What is the projected income up to 2010 ?
annualIncome <- function(x) {
stopifnot(x > 1995)
test.frame<-data.frame(year=0:7, value= c(53807,55217,55209,55415,63100,63206,63761,63766))
lma=lm(test.frame$value~test.frame$year) # let's get a linear fit
# Model control
par(plotgrid=c(2,2))
plot(lma)
test.frame <- data.frame(year=x-1995, value=0) # Testing based on year
predict.lm(lma, test.frame)
}
annualIncome(2010)
> 78914.33
Simple, if you actually knew where to start, that would be stackoverflow :) i am referring to.
http://stackoverflow.com/questions/8352673/doing-linear-prediction-with-r-how-to-access-the-predicted-parameters
#Model Control
#Excel Agrees in the result being 78914.33
# Log from the code trip for learning purposes
# https://www.khanacademy.org/math/probability/regression/regression-correlation/v/fitting-a-line-to-data
# Trying to make R do that same work as Excel did for this video
# Annual income from 1995 in the USA
# What is the projected income up to 2010 ?
test.frame<-data.frame(year=0:7, value= c(53807,55217,55209,55415,63100,63206,63761,63766))
lma=lm(test.frame$value~test.frame$year) # let's get a linear fit
summary(lma) # let's see some parameters
attributes(lma) # let's see what parameters we can call
lma$coefficients # I get the intercept and gradient
lma$coefficients[1] # I get the intercept
test.frame <- data.frame(year=8:20, value=0) # Testing for year 8
predict.lm(lma, test.frame)
# based on Intercept and yi we can create the formula below from the data in lm(n ~ yi)
8*1716+53181
9*1716+53181
10*1716+53181
11*1716+53181
# The different approaches for plotting
# Working and learning
yi = c(0:7)
y = c(1995:2002)
print(y)
n = c(53807,55217,55209,55415,63100,63206,63761,63766)
print(n)
mean(n)
sum(n)
summary(n)
# plot(n)
df = data.frame(y, n)
print(df)
summary(df)
## Plot data ############################################################
with(df, plot(y, n))
title(main="Average Income pr Year\nfor a US Household")
## Generate simple linear regression line ############################################################
lm.out = lm(n ~ yi)
lm.out
summary(lm.out)
options(show.signif.stars=F)
anova(lm.out) # shows an ANOVA table
plot(n ~ yi, main="Linera Regression Plot")
abline(lm(n ~ yi), col="blue") # same
# abline(lsfit(0:7, n), col="red") # Looking at the example from abline help we found lsfit also is an option
# handle prediction
predict(lm(n ~ yi))
# lma = lm(n ~ yi)
# attributes(lma) # let's see what parameters we can call
# lma$coefficients
##############################################################
# actual prediction / We could have just made it with this.
# http://stackoverflow.com/questions/8352673/doing-linear-prediction-with-r-how-to-access-the-predicted-parameters
test.frame<-data.frame(year=0:7, value= c(53807,55217,55209,55415,63100,63206,63761,63766))
lma=lm(test.frame$value~test.frame$year) # let's get a linear fit
summary(lma) # let's see some parameters
attributes(lma) # let's see what parameters we can call
lma$coefficients # I get the intercept and gradient
lma$coefficients[1] # I get the intercept
test.frame <- data.frame(year=8:20, value=0) # Testing for year 8
predict.lm(lma, test.frame)
# based on Intercept and yi we can create the formula below from the data in lm(n ~ yi)
8*1716+53181
9*1716+53181
10*1716+53181
11*1716+53181
##############################################################
# Stop can be used inside functions, if outside the rest is executed when run script is run
stop("The rest of the script file is sample work ")
# help(predict)
# help(lsfit)
#summary(lsfit(0:7, n))
#summary(lm(0:7 ~ n))
# help(abline)
# SAMPLE FROM abline a very simple plot approach to this
#require(stats)
#sale5 <- c(53807,55217,55209,55415,63100,63206,63761,63766)
#plot(sale5 ~ yi)
#abline(lsfit(0:7, sale5))
## abline(lsfit(0:7, sale5, intercept = FALSE), col = 4) # less fitting
# help(lm)
# EXAMPLE lm
# ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
# trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
# group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
# weight <- c(ctl, trt)
# lm.D9 <- lm(weight ~ group)
# lm.D90 <- lm(weight ~ group - 1) # omitting intercept
# anova(lm.D9)
# summary(lm.D90)
# opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
# plot(lm.D9, las = 1) # Residuals, Fitted, ...
# par(opar)
# EXAMPLE FUNCTION
# foo <- function(x) {
# stopifnot(x > 500)
# rest of program
# }
# foo(10)
No comments: