I hit a project where some statistical work needs to be done and found myself in the new country of "R". The r-project.org is a opensource Statistical tool that is widely used in the research arena. Installation is very simple, and you are then meet by a R Console.

Using khanacademy.org for refreshing probability and regression i found this nice simple example:

https://www.khanacademy.org/math/probability/regression/regression-correlation/v/fitting-a-line-to-data

Simple enough to generate using Excel but I would like to recreate this in "R".

This was the end solution as a script.

# The short answer

# https://www.khanacademy.org/math/probability/regression/regression-correlation/v/fitting-a-line-to-data

# Trying to make R do that same work as Excel did for this video

# Annual income from 1995 in the USA

# What is the projected income up to 2010 ?

annualIncome <- function(x) {

stopifnot(x > 1995)

test.frame<-data.frame(year=0:7, value= c(53807,55217,55209,55415,63100,63206,63761,63766))

lma=lm(test.frame$value~test.frame$year) # let's get a linear fit

# Model control

par(plotgrid=c(2,2))

plot(lma)

test.frame <- data.frame(year=x-1995, value=0) # Testing based on year

predict.lm(lma, test.frame)

}

annualIncome(2010)

> 78914.33

Simple, if you actually knew where to start, that would be stackoverflow :) i am referring to.

http://stackoverflow.com/questions/8352673/doing-linear-prediction-with-r-how-to-access-the-predicted-parameters

#Model Control

#Excel Agrees in the result being 78914.33

# Log from the code trip for learning purposes

# https://www.khanacademy.org/math/probability/regression/regression-correlation/v/fitting-a-line-to-data

# Trying to make R do that same work as Excel did for this video

# Annual income from 1995 in the USA

# What is the projected income up to 2010 ?

test.frame<-data.frame(year=0:7, value= c(53807,55217,55209,55415,63100,63206,63761,63766))

lma=lm(test.frame$value~test.frame$year) # let's get a linear fit

summary(lma) # let's see some parameters

attributes(lma) # let's see what parameters we can call

lma$coefficients # I get the intercept and gradient

lma$coefficients[1] # I get the intercept

test.frame <- data.frame(year=8:20, value=0) # Testing for year 8

predict.lm(lma, test.frame)

# based on Intercept and yi we can create the formula below from the data in lm(n ~ yi)

8*1716+53181

9*1716+53181

10*1716+53181

11*1716+53181

# The different approaches for plotting

# Working and learning

yi = c(0:7)

y = c(1995:2002)

print(y)

n = c(53807,55217,55209,55415,63100,63206,63761,63766)

print(n)

mean(n)

sum(n)

summary(n)

# plot(n)

df = data.frame(y, n)

print(df)

summary(df)

## Plot data ############################################################

with(df, plot(y, n))

title(main="Average Income pr Year\nfor a US Household")

## Generate simple linear regression line ############################################################

lm.out = lm(n ~ yi)

lm.out

summary(lm.out)

options(show.signif.stars=F)

anova(lm.out) # shows an ANOVA table

plot(n ~ yi, main="Linera Regression Plot")

abline(lm(n ~ yi), col="blue") # same

# abline(lsfit(0:7, n), col="red") # Looking at the example from abline help we found lsfit also is an option

# handle prediction

predict(lm(n ~ yi))

# lma = lm(n ~ yi)

# attributes(lma) # let's see what parameters we can call

# lma$coefficients

##############################################################

# actual prediction / We could have just made it with this.

# http://stackoverflow.com/questions/8352673/doing-linear-prediction-with-r-how-to-access-the-predicted-parameters

test.frame<-data.frame(year=0:7, value= c(53807,55217,55209,55415,63100,63206,63761,63766))

lma=lm(test.frame$value~test.frame$year) # let's get a linear fit

summary(lma) # let's see some parameters

attributes(lma) # let's see what parameters we can call

lma$coefficients # I get the intercept and gradient

lma$coefficients[1] # I get the intercept

test.frame <- data.frame(year=8:20, value=0) # Testing for year 8

predict.lm(lma, test.frame)

# based on Intercept and yi we can create the formula below from the data in lm(n ~ yi)

8*1716+53181

9*1716+53181

10*1716+53181

11*1716+53181

##############################################################

# Stop can be used inside functions, if outside the rest is executed when run script is run

stop("The rest of the script file is sample work ")

# help(predict)

# help(lsfit)

#summary(lsfit(0:7, n))

#summary(lm(0:7 ~ n))

# help(abline)

# SAMPLE FROM abline a very simple plot approach to this

#require(stats)

#sale5 <- c(53807,55217,55209,55415,63100,63206,63761,63766)

#plot(sale5 ~ yi)

#abline(lsfit(0:7, sale5))

## abline(lsfit(0:7, sale5, intercept = FALSE), col = 4) # less fitting

# help(lm)

# EXAMPLE lm

# ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)

# trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)

# group <- gl(2, 10, 20, labels = c("Ctl","Trt"))

# weight <- c(ctl, trt)

# lm.D9 <- lm(weight ~ group)

# lm.D90 <- lm(weight ~ group - 1) # omitting intercept

# anova(lm.D9)

# summary(lm.D90)

# opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))

# plot(lm.D9, las = 1) # Residuals, Fitted, ...

# par(opar)

# EXAMPLE FUNCTION

# foo <- function(x) {

# stopifnot(x > 500)

# rest of program

# }

# foo(10)

## No comments: