Äîêóìåíò âçÿò èç êýøà ïîèñêîâîé ìàøèíû. Àäðåñ îðèãèíàëüíîãî äîêóìåíòà : http://herba.msu.ru/shipunov/school/biol_240/en/visual_statistics.txt
Äàòà èçìåíåíèÿ: Fri Apr 8 00:51:49 2016
Äàòà èíäåêñèðîâàíèÿ: Sun Apr 10 19:42:52 2016
Êîäèðîâêà: IBM-866

> pdf.options(pointsize = 16)

> b <- matrix(1:9, ncol = 3)

> q
function (save = "default", status = 0, runLast = TRUE)
.Internal(quit(save, status, runLast))



> a <- c(1, 2, 3, 4, 5)

> a
[1] 1 2 3 4 5

> b <- 1:5

> b
[1] 1 2 3 4 5

> getwd()
[1] "/home/alexey/wrk/web/herba/shipunov/school/biol_240/en"

> getwd()
[1] "/home/alexey/wrk/web/herba/shipunov/school/biol_240/en"

> getwd()
[1] "/home/alexey/wrk/web/herba/shipunov/school/biol_240/en"

> dir("data")
[1] "abio_c.txt" "abio.txt" "beer_c.txt" "beer.txt"
[5] "betula_c.txt" "betula.txt" "bugs_new.txt" "bugs.txt"
[9] "cashiers_c.txt" "cashiers.txt" "cplants_c.txt" "cplants.txt"
[13] "daisy_c.txt" "daisy.txt" "elections_c.txt" "elections.txt"
[17] "eq_c.txt" "eq_l.txt" "eq_s.txt" "eq.txt"
[21] "errors_c.txt" "errors.txt" "grades_c.txt" "grades.txt"
[25] "hwc_c.txt" "hwc.txt" "ipomopsis_c.txt" "ipomopsis.txt"
[29] "lakesea_abio.txt" "lakesea_bio.txt" "logit_c.txt" "logit.txt"
[33] "mydata.txt" "mydata1.txt" "mydata3.txt" "poisoning_c.txt"
[37] "poisoning.txt" "pokorm_c.txt" "pokorm.txt" "primula_c.txt"
[41] "primula.txt" "seedlings_c.txt" "seedlings.txt" "seeing_c.txt"
[45] "seeing.txt" "spur_c.txt" "spur.txt" "sundew_c.txt"
[49] "sundew.txt"

> read.table("data/mydata.txt", sep = ";", head = TRUE)
a b c
1 1 2 3
2 4 5 6
3 7 8 9

> read.table("data/mydata1.txt", sep = "\t", head = TRUE)
a b c
one 1 2 3
two 4 5 6
three 7 8 9

> read.table("data/mydata3.txt", dec = ",", sep = ";",
+ h = T)
a b c
1 1 2 3.5
2 4 5 6.0
3 7 8 9.0

> x <- "apple"

> save(x, file = "x.rd")

> exists("x")
[1] TRUE

> rm(x)

> exists("x")
[1] FALSE

> dir()
[1] "000todo" "1-20.png"
[3] "1.txt" "data"
[5] "graph.pdf" "Makefile"
[7] "old.zip" "open"
[9] "pics" "r"
[11] "Rplots.pdf" "rrefc_en.log"
[13] "rrefc_en.pdf" "rrefc_en.tex"
[15] "Sweave.sty" "test-Sweave-002.pdf"
[17] "test-Sweave.aux" "test-Sweave.log"
[19] "test-Sweave.Rnw" "test-Sweave.tex"
[21] "trees.csv" "visual_statistics.aux"
[23] "visual_statistics.log" "visual_statistics.out"
[25] "visual_statistics.pdf" "visual_statistics.r"
[27] "visual_statistics.synctex.gz" "visual_statistics.tex"
[29] "visual_statistics.toc" "visual_statistics.txt"
[31] "x.rd"

> load("x.rd")

> x
[1] "apple"

> write.table(file = "trees.csv", trees, row.names = FALSE,
+ sep = ";", quote = FALSE)

> sink("1.txt", split = TRUE)

> 2 + 2
[1] 4

> sink()

> sink("1.txt", split = TRUE, append = TRUE)

> print("2+2")
[1] "2+2"

> 2 + 2
[1] 4

> sink()

> pdf(file = "pics/05430.pdf")

> oldpar <- par(mar = c(2, 2, 2, 1))

> plot(1:20, main = "Title")

> legend("topleft", pch = 1, legend = "My wonderful points")

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/05610.pdf")

> oldpar <- par(mar = c(2, 2, 2, 1))

> plot(cars)

> title(main = "Cars from 1920s")

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/05960.pdf")

> oldpar <- par(mar = c(2, 2, 1, 1))

> library(ggplot2)

> qplot(1:20, 1:20, main = "title")

> par(oldpar)

> dev.off()
null device
1

> png(file = "1-20.png", bg = "transparent")

> plot(1:20)

> text(10, 20, "a")

> dev.off()
null device
1

> pdf(file = "pics/06060.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> old.par <- par(mfrow = c(2, 1))

> hist(cars$speed, main = "")

> hist(cars$dist, main = "")

> par(old.par)

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/parametric_to_crop.pdf")

> oldpar <- par(mar = c(0, 0, 0, 0))

> library(plotrix)

> plot(c(-1, 1), c(-1, 1), type = "n", xlab = "", ylab = "",
+ axes = FALSE)

> draw.circle(-0.2, 0, 0.4)

> draw.circle(0.1, 0, 0.9)

> text(-0.2, 0, "parametric", cex = 1.5)

> text(0.1, 0.6, "non-parametric", cex = 1.5)

> par(oldpar)

> dev.off()
null device
1

> x <- c(174, 162, 188, 192, 165, 168, 172.5)

> str(x)
num [1:7] 174 162 188 192 165 ...

> is.vector(x)
[1] TRUE

> is.numeric(x)
[1] TRUE

> sex <- c("male", "female", "male", "male", "female",
+ "male", "male")

> is.character(sex)
[1] TRUE

> is.vector(sex)
[1] TRUE

> str(sex)
chr [1:7] "male" "female" "male" "male" "female" "male" ...

> sex
[1] "male" "female" "male" "male" "female" "male" "male"

> sex[1]
[1] "male"

> sex[2:3]
[1] "female" "male"

> table(sex)
sex
female male
2 5

> sex.f <- factor(sex)

> sex.f
[1] male female male male female male male
Levels: female male

> pdf(file = "pics/08830.pdf")

> oldpar <- par(mar = c(2, 2, 1, 1))

> plot(sex.f)

> par(oldpar)

> dev.off()
null device
1

> is.factor(sex.f)
[1] TRUE

> is.character(sex.f)
[1] FALSE

> str(sex.f)
Factor w/ 2 levels "female","male": 2 1 2 2 1 2 2

> sex.f[5:6]
[1] female male
Levels: female male

> sex.f[6:7]
[1] male male
Levels: female male

> sex.f[6:7, drop = TRUE]
[1] male male
Levels: male

> factor(as.character(sex.f[6:7]))
[1] male male
Levels: male

> as.numeric(sex.f)
[1] 2 1 2 2 1 2 2

> w <- c(69, 68, 93, 87, 59, 82, 72)

> pdf(file = "pics/09290.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> plot(x, w, pch = as.numeric(sex.f), col = as.numeric(sex.f))

> legend("topleft", pch = 1:2, col = 1:2, legend = levels(sex.f))

> par(oldpar)

> dev.off()
null device
1

> m <- c("L", "S", "XL", "XXL", "S", "M", "L")

> m.f <- factor(m)

> m.f
[1] L S XL XXL S M L
Levels: L M S XL XXL

> m.o <- ordered(m.f, levels = c("S", "M", "L", "XL",
+ "XXL"))

> m.o
[1] L S XL XXL S M L
Levels: S < M < L < XL < XXL

> a <- factor(3:5)

> a
[1] 3 4 5
Levels: 3 4 5

> as.numeric(a)
[1] 1 2 3

> as.numeric(as.character(a))
[1] 3 4 5

> pdf(file = "pics/09860.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> daisy.t <- read.table("data/daisy.txt", sep = "\t")

> daisy <- daisy.t$V2

> names(daisy) <- daisy.t$V1

> oldpar <- par(mar = c(7, 4, 4, 2) + 0.1)

> daisy.plot <- barplot(daisy, names.arg = "")

> text(daisy.plot, par("usr")[3] - 0.5, srt = 45, adj = 1,
+ xpd = TRUE, labels = names(daisy))

> par(oldpar)

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/10070.pdf")

> oldpar <- par(mar = c(2, 2, 3, 1))

> dotchart(daisy)

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/06105.pdf")

> cplants <- read.table("data/cplants.txt", sep = "\t",
+ quote = "", as.is = T)

> library(wordcloud)

> cfams <- data.frame(table(cplants$V3))

> set.seed(5)

> wordcloud(cfams[, 1], cfams[, 2])

> dev.off()
null device
1

> a1 <- c(1, 2, 3, 4, 4, 5, 7, 7, 7, 9, 15, 17)

> a2 <- c(1, 2, 3, 4, 5, 7, 7, 7, 9, 15, 17)

> names(a1) <- rank(a1)

> a1
1 2 3 4.5 4.5 6 8 8 8 10 11 12
1 2 3 4 4 5 7 7 7 9 15 17

> names(a2) <- rank(a2)

> a2
1 2 3 4 5 7 7 7 9 10 11
1 2 3 4 5 7 7 7 9 15 17

> wilcox.test(a2)

Wilcoxon signed rank test with continuity correction

data: a2
V = 66, p-value = 0.003788
alternative hypothesis: true location is not equal to 0


> h <- c(8, 10, NA, NA, 8, NA, 8)

> h
[1] 8 10 NA NA 8 NA 8

> mean(h)
[1] NA

> mean(h, na.rm = TRUE)
[1] 8.5

> mean(na.omit(h))
[1] 8.5

> h.old <- h

> h.old
[1] 8 10 NA NA 8 NA 8

> h[is.na(h)] <- mean(h, na.rm = TRUE)

> h
[1] 8.0 10.0 8.5 8.5 8.0 8.5 8.0

> a <- 1:10

> b <- seq(100, 1000, 100)

> d <- data.frame(a, b)

> d
a b
1 1 100
2 2 200
3 3 300
4 4 400
5 5 500
6 6 600
7 7 700
8 8 800
9 9 900
10 10 1000

> scale(d)
a b
[1,] -1.4863011 -1.4863011
[2,] -1.1560120 -1.1560120
[3,] -0.8257228 -0.8257228
[4,] -0.4954337 -0.4954337
[5,] -0.1651446 -0.1651446
[6,] 0.1651446 0.1651446
[7,] 0.4954337 0.4954337
[8,] 0.8257228 0.8257228
[9,] 1.1560120 1.1560120
[10,] 1.4863011 1.4863011
attr(,"scaled:center")
a b
5.5 550.0
attr(,"scaled:scale")
a b
3.02765 302.76504

> m <- 1:4

> m
[1] 1 2 3 4

> ma <- matrix(m, ncol = 2, byrow = TRUE)

> ma
[,1] [,2]
[1,] 1 2
[2,] 3 4

> str(ma)
int [1:2, 1:2] 1 3 2 4

> str(m)
int [1:4] 1 2 3 4

> mb <- m

> mb
[1] 1 2 3 4

> attr(mb, "dim") <- c(2, 2)

> mb
[,1] [,2]
[1,] 1 3
[2,] 2 4

> m3 <- 1:8

> dim(m3) <- c(2, 2, 2)

> m3
, , 1

[,1] [,2]
[1,] 1 3
[2,] 2 4

, , 2

[,1] [,2]
[1,] 5 7
[2,] 6 8


> l <- list("R", 1:3, TRUE, NA, list("r", 4))

> l
[[1]]
[1] "R"

[[2]]
[1] 1 2 3

[[3]]
[1] TRUE

[[4]]
[1] NA

[[5]]
[[5]][[1]]
[1] "r"

[[5]][[2]]
[1] 4



> fred <- list(name = "Fred", wife.name = "Mary", no.children = 3,
+ child.ages = c(1, 5, 9))

> fred
$name
[1] "Fred"

$wife.name
[1] "Mary"

$no.children
[1] 3

$child.ages
[1] 1 5 9


> h[3]
[1] 8.5

> ma[2, 1]
[1] 3

> l[1]
[[1]]
[1] "R"


> str(l[1])
List of 1
$ : chr "R"

> l[[1]]
[1] "R"

> str(l[[1]])
chr "R"

> str(l[[5]])
List of 2
$ : chr "r"
$ : num 4

> names(l) <- c("first", "second", "third", "fourth",
+ "fifth")

> l$first
[1] "R"

> str(l$first)
chr "R"

> names(w) <- c("Nick", "Jenny", "Peter", "Alex", "Kathryn",
+ "Vas", "George")

> w
Nick Jenny Peter Alex Kathryn Vas George
69 68 93 87 59 82 72

> rownames(ma) <- c("a1", "a2")

> colnames(ma) <- c("b1", "b2")

> ma
b1 b2
a1 1 2
a2 3 4

> w["Jenny"]
Jenny
68

> d <- data.frame(weight = w, height = x, size = m.o,
+ sex = sex.f)

> d
weight height size sex
Nick 69 174.0 L male
Jenny 68 162.0 S female
Peter 93 188.0 XL male
Alex 87 192.0 XXL male
Kathryn 59 165.0 S female
Vas 82 168.0 M male
George 72 172.5 L male

> str(d)
'data.frame': 7 obs. of 4 variables:
$ weight: num 69 68 93 87 59 82 72
$ height: num 174 162 188 192 165 ...
$ size : Ord.factor w/ 5 levels "S"<"M"<"L"<"XL"<..: 3 1 4 5 1 2 3
$ sex : Factor w/ 2 levels "female","male": 2 1 2 2 1 2 2

> d$weight
[1] 69 68 93 87 59 82 72

> d[[1]]
[1] 69 68 93 87 59 82 72

> d[, 1]
[1] 69 68 93 87 59 82 72

> d[, "weight"]
[1] 69 68 93 87 59 82 72

> d[, 2:4]
height size sex
Nick 174.0 L male
Jenny 162.0 S female
Peter 188.0 XL male
Alex 192.0 XXL male
Kathryn 165.0 S female
Vas 168.0 M male
George 172.5 L male

> d[, -1]
height size sex
Nick 174.0 L male
Jenny 162.0 S female
Peter 188.0 XL male
Alex 192.0 XXL male
Kathryn 165.0 S female
Vas 168.0 M male
George 172.5 L male

> d[d$sex == "female", ]
weight height size sex
Jenny 68 162 S female
Kathryn 59 165 S female

> d$sex == "female"
[1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE

> d[order(d$sex, d$height), ]
weight height size sex
Jenny 68 162.0 S female
Kathryn 59 165.0 S female
Vas 82 168.0 M male
George 72 172.5 L male
Nick 69 174.0 L male
Peter 93 188.0 XL male
Alex 87 192.0 XXL male

> rev(sort(daisy))
loves me very much loves me madly loves me passionately
24 17 16
loves me a lot loves me a little loves me not at all
15 15 13

> ma <- matrix(m, ncol = 2, byrow = FALSE)

> ma
[,1] [,2]
[1,] 1 3
[2,] 2 4

> d.sorted <- d[order(d$sex, d$height), ]

> d.sorted[, order(names(d.sorted))]
height sex size weight
Jenny 162.0 female S 68
Kathryn 165.0 female S 59
Vas 168.0 male M 82
George 172.5 male L 72
Nick 174.0 male L 69
Peter 188.0 male XL 93
Alex 192.0 male XXL 87

> salary <- c(21, 19, 27, 11, 102, 25, 21)

> mean(salary)
[1] 32.28571

> median(salary)
[1] 21

> a1 <- c(1, 2, 3, 4, 4, 5, 7, 7, 7, 9, 15, 17)

> a2 <- c(1, 2, 3, 4, 5, 7, 7, 7, 9, 15, 17)

> median(a1)
[1] 6

> median(a2)
[1] 7

> attach(trees)

> mean(Girth)
[1] 13.24839

> mean(Height)
[1] 76

> mean(Volume/Height)
[1] 0.3890012

> detach(trees)

> with(trees, mean(Volume/Height))
[1] 0.3890012

> sapply(trees, mean)
Girth Height Volume
13.24839 76.00000 30.17097

> sex <- c("male", "female", "male", "male", "female",
+ "male", "male")

> t.sex <- table(sex)

> mode <- names(t.sex[which.max(t.sex)])

> mode
[1] "male"

> sd(salary)
[1] 31.15934

> var(salary)
[1] 970.9048

> IQR(salary)
[1] 6

> attach(trees)

> mean(Height)
[1] 76

> median(Height)
[1] 76

> sd(Height)
[1] 6.371813

> IQR(Height)
[1] 8

> detach(trees)

> pdf(file = "pics/14390.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> new.1000 <- sample((median(salary) - IQR(salary)):(median(salary) +
+ IQR(salary)), 1000, replace = TRUE)

> salary2 <- c(salary, new.1000)

> boxplot(salary2, log = "y")

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/14560.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> boxplot(trees)

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/14730.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> hist(salary2, breaks = 20, main = "")

> par(oldpar)

> dev.off()
null device
1

> table(cut(salary2, 20))

(10.9,15.6] (15.6,20.1] (20.1,24.6] (24.6,29.2] (29.2,33.8] (33.8,38.3]
84 379 311 232 0 0
(38.3,42.9] (42.9,47.4] (47.4,52] (52,56.5] (56.5,61] (61,65.6]
0 0 0 0 0 0
(65.6,70.2] (70.2,74.7] (74.7,79.2] (79.2,83.8] (83.8,88.3] (88.3,92.9]
0 0 0 0 0 0
(92.9,97.5] (97.5,102]
0 1

> stem(salary, scale = 2)

The decimal point is 1 digit(s) to the right of the |

1 | 19
2 | 1157
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | 2


> pdf(file = "pics/15260.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> plot(density(salary2, adjust = 2), main = "")

> rug(salary2)

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/15580.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> library("beeswarm")

> beeswarm(trees)

> boxplot(trees, add = TRUE)

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/stripchart.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> stripchart(data.frame(scale(trees)), vertical = TRUE)

> par(oldpar)

> dev.off()
null device
1

> lapply(list(salary, salary2), summary)
[[1]]
Min. 1st Qu. Median Mean 3rd Qu. Max.
11.00 20.00 21.00 32.29 26.00 102.00

[[2]]
Min. 1st Qu. Median Mean 3rd Qu. Max.
11.00 18.00 21.00 21.08 24.00 102.00


> summary(attenu)
event mag station dist
Min. : 1.00 Min. :5.000 117 : 5 Min. : 0.50
1st Qu.: 9.00 1st Qu.:5.300 1028 : 4 1st Qu.: 11.32
Median :18.00 Median :6.100 113 : 4 Median : 23.40
Mean :14.74 Mean :6.084 112 : 3 Mean : 45.60
3rd Qu.:20.00 3rd Qu.:6.600 135 : 3 3rd Qu.: 47.55
Max. :23.00 Max. :7.700 (Other):147 Max. :370.00
NA's : 16
accel
Min. :0.00300
1st Qu.:0.04425
Median :0.11300
Mean :0.15422
3rd Qu.:0.21925
Max. :0.81000


> 100 * sapply(trees, sd)/colMeans(trees)
Girth Height Volume
23.686948 8.383964 54.482331

> dir("data")
[1] "abio_c.txt" "abio.txt" "beer_c.txt" "beer.txt"
[5] "betula_c.txt" "betula.txt" "bugs_new.txt" "bugs.txt"
[9] "cashiers_c.txt" "cashiers.txt" "cplants_c.txt" "cplants.txt"
[13] "daisy_c.txt" "daisy.txt" "elections_c.txt" "elections.txt"
[17] "eq_c.txt" "eq_l.txt" "eq_s.txt" "eq.txt"
[21] "errors_c.txt" "errors.txt" "grades_c.txt" "grades.txt"
[25] "hwc_c.txt" "hwc.txt" "ipomopsis_c.txt" "ipomopsis.txt"
[29] "lakesea_abio.txt" "lakesea_bio.txt" "logit_c.txt" "logit.txt"
[33] "mydata.txt" "mydata1.txt" "mydata3.txt" "poisoning_c.txt"
[37] "poisoning.txt" "pokorm_c.txt" "pokorm.txt" "primula_c.txt"
[41] "primula.txt" "seedlings_c.txt" "seedlings.txt" "seeing_c.txt"
[45] "seeing.txt" "spur_c.txt" "spur.txt" "sundew_c.txt"
[49] "sundew.txt"

> err <- read.table("data/errors.txt", h = TRUE, sep = "\t")

> str(err)
'data.frame': 7 obs. of 3 variables:
$ AGE : Factor w/ 6 levels "12","22","23",..: 3 4 3 5 1 6 2
$ NAME : Factor w/ 6 levels "","John","Kate",..: 2 3 1 4 5 6 2
$ HEIGHT: num 172 163 161 16.1 132 155 183

> summary(err)
AGE NAME HEIGHT
12:1 :1 Min. : 16.1
22:1 John :2 1st Qu.:143.5
23:2 Kate :1 Median :161.0
24:1 Lucy :1 Mean :140.3
56:1 Penny:1 3rd Qu.:167.5
a :1 Sasha:1 Max. :183.0

> t.test(salary, mu = mean(salary))

One Sample t-test

data: salary
t = 0, df = 6, p-value = 1
alternative hypothesis: true mean is not equal to 32.28571
95 percent confidence interval:
3.468127 61.103302
sample estimates:
mean of x
32.28571


> wilcox.test(salary, mu = median(salary), conf.int = TRUE)

Wilcoxon signed rank test with continuity correction

data: salary
V = 10, p-value = 0.5896
alternative hypothesis: true location is not equal to 21
80 percent confidence interval:
17.99999 63.50002
sample estimates:
(pseudo)median
24.99994


> pdf(file = "pics/17030.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> qqnorm(salary2, main = "")

> qqline(salary2, col = 2)

> par(oldpar)

> dev.off()
null device
1

> shapiro.test(salary)

Shapiro-Wilk normality test

data: salary
W = 0.61164, p-value = 0.0003726


> shapiro.test(salary2)

Shapiro-Wilk normality test

data: salary2
W = 0.74115, p-value < 2.2e-16


> set.seed(1638)

> shapiro.test(rnorm(100))

Shapiro-Wilk normality test

data: rnorm(100)
W = 0.99337, p-value = 0.9094


> ks.test(salary2, "pnorm")

One-sample Kolmogorov-Smirnov test

data: salary2
D = 1, p-value < 2.2e-16
alternative hypothesis: two-sided


> oldoptions <- options(scipen = 100)

> ks.test(salary2, "pnorm")

One-sample Kolmogorov-Smirnov test

data: salary2
D = 1, p-value < 0.00000000000000022
alternative hypothesis: two-sided


> options(oldoptions)

> source("r/asmisc.r")

> Normality <- function(x, p = 0.05) {
+ ifelse(shapiro.test(x)$p.value > p, "NORMAL", "NOT NORMAL")
+ }

> sapply(list(salary, salary2), Normality)
[1] "NOT NORMAL" "NOT NORMAL"

> sapply(trees, Normality)
Girth Height Volume
"NORMAL" "NORMAL" "NOT NORMAL"

> sapply(log(trees + 1), Normality)
Girth Height Volume
"NORMAL" "NORMAL" "NORMAL"

> prop.test(x = 356, n = 476, p = 0.7, alternative = "two.sided")

1-sample proportions test with continuity correction

data: 356 out of 476, null probability 0.7
X-squared = 4.9749, df = 1, p-value = 0.02572
alternative hypothesis: true p is not equal to 0.7
95 percent confidence interval:
0.7059174 0.7858054
sample estimates:
p
0.7478992


> str(shapiro.test(rnorm(100)))
List of 4
$ statistic: Named num 0.992
..- attr(*, "names")= chr "W"
$ p.value : num 0.854
$ method : chr "Shapiro-Wilk normality test"
$ data.name: chr "rnorm(100)"
- attr(*, "class")= chr "htest"

> set.seed(1683)

> shapiro.test(rnorm(100))$p.value
[1] 0.8424037

> betula <- read.table("data/betula.txt", h = TRUE)

> str(betula)
'data.frame': 229 obs. of 10 variables:
$ LOC : int 1 1 1 1 1 1 1 1 1 1 ...
$ LEAF.L : int 50 44 45 35 41 53 50 47 52 42 ...
$ LEAF.W : int 37 29 37 26 32 37 40 36 39 40 ...
$ LEAF.MAXW: int 23 20 19 15 18 25 21 21 23 19 ...
$ PUB : int 0 1 0 0 0 0 0 0 1 0 ...
$ PAPILLAE : int 1 1 1 1 1 1 1 1 1 1 ...
$ CATKIN : int 31 25 21 20 24 22 40 25 14 27 ...
$ SCALE.L : num 4 3 4 5.5 5 5 6 5 5 5 ...
$ LOBES : int 0 0 1 0 0 0 0 0 1 0 ...
$ WINGS : int 0 0 0 0 0 0 1 0 0 1 ...

> sapply(betula[, c(2:4, 7:8)], Normality)
LEAF.L LEAF.W LEAF.MAXW CATKIN SCALE.L
"NOT NORMAL" "NOT NORMAL" "NOT NORMAL" "NORMAL" "NOT NORMAL"

> CV <- function(x) {
+ 100 * sd(x)/mean(x)
+ }

> sapply(betula[, c(2:4, 7:8)], CV)
LEAF.L LEAF.W LEAF.MAXW CATKIN SCALE.L
17.93473 20.38630 26.08806 24.17354 24.72061

> prop.test(0.48 * 262, 262)

1-sample proportions test with continuity correction

data: 0.48 * 262 out of 262, null probability 0.5
X-squared = 0.34302, df = 1, p-value = 0.5581
alternative hypothesis: true p is not equal to 0.5
95 percent confidence interval:
0.4183606 0.5422355
sample estimates:
p
0.48


> power.prop.test(p1 = 0.48, p2 = 0.52, power = 0.8)

Two-sample comparison of proportions power calculation

n = 2451.596
p1 = 0.48
p2 = 0.52
sig.level = 0.05
power = 0.8
alternative = two.sided

NOTE: n is number in *each* group


> table(betula[betula$LOC < 3, c("LOC", "LOBES")])
LOBES
LOC 0 1
1 17 4
2 14 16

> prop.test(table(betula[betula$LOC < 3, c("LOC", "LOBES")]))

2-sample test for equality of proportions with continuity correction

data: table(betula[betula$LOC < 3, c("LOC", "LOBES")])
X-squared = 4.7384, df = 1, p-value = 0.0295
alternative hypothesis: two.sided
95 percent confidence interval:
0.05727638 0.62843791
sample estimates:
prop 1 prop 2
0.8095238 0.4666667


> table(betula[, c("LOBES", "WINGS")])
WINGS
LOBES 0 1
0 61 69
1 50 45

> mcnemar.test(table(betula[, c("LOBES", "WINGS")]))

McNemar's Chi-squared test with continuity correction

data: table(betula[, c("LOBES", "WINGS")])
McNemar's chi-squared = 2.7227, df = 1, p-value = 0.09893


> sample1 <- c(1, 2, 3, 4, 5, 6, 7, 8, 9)

> sample2 <- c(5, 5, 5, 5, 5, 5, 5, 5, 5)

> pdf(file = "pics/20320.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> plot(extra ~ group, data = sleep)

> par(oldpar)

> dev.off()
null device
1

> sapply(unstack(sleep, extra ~ group), Normality)
X1 X2
"NORMAL" "NORMAL"

> t.test(extra ~ group, data = sleep, paired = TRUE)

Paired t-test

data: extra by group
t = -4.0621, df = 9, p-value = 0.002833
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-2.4598858 -0.7001142
sample estimates:
mean of the differences
-1.58


> wilcox.test(Ozone ~ Month, data = airquality, subset = Month %in%
+ c(5, 8), conf.int = TRUE)

Wilcoxon rank sum test with continuity correction

data: Ozone by Month
W = 127.5, p-value = 0.0001208
alternative hypothesis: true location shift is not equal to 0
95 percent confidence interval:
-52.99999 -14.99998
sample estimates:
difference in location
-31.99996


> pdf(file = "pics/20890.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> boxplot(Ozone ~ Month, data = airquality, subset = Month %in%
+ c(5, 8))

> par(oldpar)

> dev.off()
null device
1

> a <- 1:9

> b <- rep(5, 9)

> library(effsize)

> cohen.d(a, b)

Cohen's d

d estimate: 0 (negligible)
95 percent confidence interval:
inf sup
-1.059953 1.059953

> cohen.d(extra ~ group, data = sleep)

Cohen's d

d estimate: -0.8321811 (large)
95 percent confidence interval:
inf sup
-1.8691015 0.2047393

> cliff.delta(Ozone ~ Month, data = airquality, subset = Month %in%
+ c(5, 8))

Cliff's Delta

delta estimate: -0.2991453 (small)
95 percent confidence interval:
inf sup
-0.6255598 0.1163964

> with(airquality, table(cut(Temp, quantile(Temp)),
+ Month))
Month
5 6 7 8 9
(56,72] 24 3 0 1 10
(72,79] 5 15 2 9 10
(79,85] 1 7 19 7 5
(85,97] 0 5 10 14 5

> ftable(Titanic, row.vars = 1:3)
Survived No Yes
Class Sex Age
1st Male Child 0 5
Adult 118 57
Female Child 0 1
Adult 4 140
2nd Male Child 0 11
Adult 154 14
Female Child 0 13
Adult 13 80
3rd Male Child 35 13
Adult 387 75
Female Child 17 14
Adult 89 76
Crew Male Child 0 0
Adult 670 192
Female Child 0 0
Adult 3 20

> d <- factor(rep(c("A", "B", "C"), 10), levels = c("A",
+ "B", "C", "D", "E"))

> is.na(d) <- 3:4

> table(factor(d, exclude = NULL))

A B C
9 10 9 2

> pdf(file = "pics/21660.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> titanic <- apply(Titanic, c(1, 4), sum)

> titanic
Survived
Class No Yes
1st 122 203
2nd 167 118
3rd 528 178
Crew 673 212

> titanic
Survived
Class No Yes
1st 122 203
2nd 167 118
3rd 528 178
Crew 673 212

> mosaicplot(titanic, col = c("red", "green"), main = "",
+ cex.axis = 1)

> par(oldpar)

> dev.off()
null device
1

> x <- margin.table(HairEyeColor, 1:2)

> chisq.test(x)

Pearson's Chi-squared test

data: x
X-squared = 138.29, df = 9, p-value < 2.2e-16


> pdf(file = "pics/21960.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> x <- margin.table(HairEyeColor, c(1, 2))

> assocplot(x)

> par(oldpar)

> dev.off()
null device
1

> tox <- read.table("data/poisoning.txt", h = TRUE)

> head(tox)
ILL CHEESE CRABDIP CRISPS BREAD CHICKEN RICE CAESAR TOMATO ICECREAM CAKE
1 1 1 1 1 2 1 1 1 1 1 1
2 2 1 1 1 2 1 2 2 2 1 1
3 1 2 2 1 2 1 2 1 2 1 1
4 1 1 2 1 1 1 2 1 2 1 1
5 1 1 1 1 2 1 1 1 1 2 1
6 1 1 1 1 1 1 2 1 1 2 1
JUICE WINE COFFEE
1 1 1 1
2 1 1 2
3 2 1 2
4 2 1 2
5 1 1 1
6 1 2 2

> for (m in 2:ncol(tox)) {
+ tmp <- chisq.test(tox$ILL, tox[, m])
+ print(paste(names(tox)[m], tmp$p.value))
+ }
[1] "CHEESE 0.840899679390882"
[1] "CRABDIP 0.94931385140737"
[1] "CRISPS 0.999999999999999"
[1] "BREAD 0.349817724258644"
[1] "CHICKEN 0.311482217451896"
[1] "RICE 0.546434435905853"
[1] "CAESAR 0.000203410168460333"
[1] "TOMATO 0.00591250292451728"
[1] "ICECREAM 0.597712594782716"
[1] "CAKE 0.869479670886473"
[1] "JUICE 0.999999999999999"
[1] "WINE 1"
[1] "COFFEE 0.726555246056369"

> pdf(file = "pics/24120.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> assocplot(table(ILL = tox$ILL, CAESAR = tox$CAESAR))

> par(oldpar)

> dev.off()
null device
1

> tea <- matrix(c(3, 1, 1, 3), nrow = 2)

> colnames(tea) <- row.names(tea) <- c("Milk", "Tea")

> tea
Milk Tea
Milk 3 1
Tea 1 3

> chisq.test(tea)

Pearson's Chi-squared test with Yates' continuity correction

data: tea
X-squared = 0.5, df = 1, p-value = 0.4795


> fisher.test(tea)

Fisher's Exact Test for Count Data

data: tea
p-value = 0.4857
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
0.2117329 621.9337505
sample estimates:
odds ratio
6.408309


> chisq.test(tea, simulate.p.value = T)

Pearson's Chi-squared test with simulated p-value (based on 2000
replicates)

data: tea
X-squared = 2, df = NA, p-value = 0.4693


> pok <- read.table("data/pokorm.txt", h = TRUE, sep = ";")

> source("r/concordance.r")

> cohen.kappa(as.matrix(pok))
Kappa test for nominally classified data
2 categories - 2 methods
kappa (Cohen) = 0.718855 , Z = 8.56608 , p = 0
kappa (Siegel) = 0.67419 , Z = 7.11279 , p = 5.68656e-13
kappa (2*PA-1) = 0.761658


> cor(5:15, 7:17)
[1] 1

> cor(5:15, c(7:16, 23))
[1] 0.9375093

> cor(trees)
Girth Height Volume
Girth 1.0000000 0.5192801 0.9671194
Height 0.5192801 1.0000000 0.5982497
Volume 0.9671194 0.5982497 1.0000000

> x <- rexp(50)

> cor(x, log(x), method = "spearman")
[1] 1

> symnum(cor(longley))
GNP. GNP U A P Y E
GNP.deflator 1
GNP B 1
Unemployed , , 1
Armed.Forces . . 1
Population B B , . 1
Year B B , . B 1
Employed B B . . B B 1
attr(,"legend")
[1] 0 òÀØ òÀÙ 0.3 òÀØ.òÀÙ 0.6 òÀØ,òÀÙ 0.8 òÀØ+òÀÙ 0.9 òÀØ*òÀÙ 0.95 òÀØBòÀÙ 1

> pdf(file = "pics/22880.pdf")

> oldpar <- par(mar = c(2, 3, 0, 1))

> cor.l <- cor(longley)

> colnames(cor.l) <- abbreviate(colnames(cor.l))

> rownames(cor.l) <- abbreviate(rownames(cor.l))

> image(1:ncol(cor.l), 1:nrow(cor.l), cor.l, col = heat.colors(22),
+ axes = FALSE, xlab = "", ylab = "")

> axis(1, at = 1:ncol(cor.l), labels = colnames(cor.l))

> axis(2, at = 1:nrow(cor.l), labels = rownames(cor.l),
+ las = 2)

> par(oldpar)

> dev.off()
null device
1

> pdf(file = "pics/23120.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> library(ellipse)

> plotcorr(cor.l, type = "lower", mar = c(0, 0, 0, 0))

> par(oldpar)

> dev.off()
null device
1

> with(trees, cor.test(Girth, Height))

Pearson's product-moment correlation

data: Girth and Height
t = 3.2722, df = 29, p-value = 0.002758
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.2021327 0.7378538
sample estimates:
cor
0.5192801


> source("r/pleiad.r")

> i <- read.table("data/abio.txt", sep = "\t", h = TRUE)

> Nnames(i)
Hint: var.number var.name/number.of.NA
[1] 1 inc/0 2 out/0 3 KIO/0 4 KPO/0
[5] 5 KSO/0 6 t11/0 7 Litoral/0 8 Sand/0
[9] 9 Gravel/0 10 Driftwood/0 11 Meadow/0 12 Rocks/0
[13] 13 Boulders/0 14 Marsh/0 15 Pool/0 16 Crowberry/0
[17] 17 Forest/0 18 Stones/0 19 Burns/0 20 Cuts/0
[21] 21 Soil.damages/0 22 Fire.pits/0 23 Constructions/0

> pdf(file = "pics/cover_to_crop.pdf")

> abio.dyn <- cor(i[, 1:6], i[, 7:23], method = "spearman",
+ use = "complete.obs")

> abio.dynn <- abs(abio.dyn[, -c(1, 7, 9, 14)])

> abio.dynn <- ifelse(abio.dynn < 0.2, 0, abio.dynn)

> Pleiad(abio.dynn, abbr = 2, legend = FALSE, pch = 21,
+ cex = 2, bg = "white")

> dev.off()
null device
1

> Cor(i, method = "spearman", use = "complete.obs")
inc out KIO KPO KSO t11 Litoral
inc 1* 0.1786 -0.4179* 0.3991* -0.1782 0.7968* 0.1528
out 0.1786 1* 0.0738 -0.5037* 0.1321 0.4865* 0.127
KIO -0.4179* 0.0738 1* 0.0386 -0.3861* -0.4442* -0.1038
KPO 0.3991* -0.5037* 0.0386 1* -0.7442* -0.1119 -0.0572
KSO -0.1782 0.1321 -0.3861* -0.7442* 1* 0.2226 -0.0582
t11 0.7968* 0.4865* -0.4442* -0.1119 0.2226 1* 0.0442
Litoral 0.1528 0.127 -0.1038 -0.0572 -0.0582 0.0442 1*
Sand 0.3815* 0.2186 -0.22 -0.0782 0.048 0.451* 0.2422
Gravel 0.4042* 0.3869* -0.3262* -0.0463 0.0999 0.4508* 0.2756*
Driftwood 0.5327* 0.1799 -0.5127* 0.0852 0.0891 0.5268* 0.3433*
Meadow 0.5291* 0.1698 -0.4729* 0.0695 0.0318 0.4566* 0.4032*
Rocks -0.3174* -0.208 0.3993* 0.2742* -0.3948* -0.4698* -0.3924*
Boulders 0.0233 0.1046 -0.0194 -0.1467 0.0936 0.0847 0.2103
Marsh 0.4636* 0.1741 -0.2164 0.0823 -0.0116 0.5533* -0.1253
Pool NA NA NA NA NA NA NA
Crowberry 0.3173* 0.226 -0.3931* -0.2958* 0.4051* 0.5308* -0.0136
Forest 0.3219* 0.4711* -0.1218 -0.228 0.1294 0.5289* 0.1046
Stones 0.5856* 0.2144 -0.3361* -0.0418 0.1779 0.6532* -0.0452
Burns -0.041 0.1883 0.2554* -0.0241 -0.0649 -0.0512 -0.0167
Cuts NA NA NA NA NA NA NA
Soil.damages 0.3804* 0.3309* -0.0902 -0.0552 0.0129 0.4756* 0.1009
Fire.pits 0.3457* 0.3988* -0.0975 -0.1769 0.0933 0.5292* 0.0821
Constructions 0.3863* 0.4829* -0.1461 -0.2423 0.1979 0.6279* -0.0259
Sand Gravel Driftwood Meadow Rocks Boulders Marsh
inc 0.3815* 0.4042* 0.5327* 0.5291* -0.3174* 0.0233 0.4636*
out 0.2186 0.3869* 0.1799 0.1698 -0.208 0.1046 0.1741
KIO -0.22 -0.3262* -0.5127* -0.4729* 0.3993* -0.0194 -0.2164
KPO -0.0782 -0.0463 0.0852 0.0695 0.2742* -0.1467 0.0823
KSO 0.048 0.0999 0.0891 0.0318 -0.3948* 0.0936 -0.0116
t11 0.451* 0.4508* 0.5268* 0.4566* -0.4698* 0.0847 0.5533*
Litoral 0.2422 0.2756* 0.3433* 0.4032* -0.3924* 0.2103 -0.1253
Sand 1* 0.306* 0.3109* 0.4912* -0.3589* -0.0756 0.0718
Gravel 0.306* 1* 0.5875* 0.4679* -0.4795* 0.1805 0.2137
Driftwood 0.3109* 0.5875* 1* 0.6683* -0.5316* 0.1925 0.2172
Meadow 0.4912* 0.4679* 0.6683* 1* -0.494* 0.1547 -0.0088
Rocks -0.3589* -0.4795* -0.5316* -0.494* 1* -0.3154* -0.1111
Boulders -0.0756 0.1805 0.1925 0.1547 -0.3154* 1* 0.0541
Marsh 0.0718 0.2137 0.2172 -0.0088 -0.1111 0.0541 1*
Pool NA NA NA NA NA NA NA
Crowberry 0.2143 0.2917* 0.4357* 0.3185* -0.577* 0.1343 0.2464*
Forest 0.3278* 0.3071* 0.0959 0.0793 -0.3763* 0.0865 0.362*
Stones 0.2549* 0.248* 0.439* 0.2899* -0.4426* -0.0209 0.218
Burns -0.1128 0.1049 -0.1594 -0.1095 0.0813 -0.0692 -0.0571
Cuts NA NA NA NA NA NA NA
Soil.damages 0.2289 0.3052* 0.1104 0.0796 -0.3615* 0.1103 0.3777*
Fire.pits 0.2174 0.3814* 0.1396 0.0887 -0.4198* 0.1515 0.2848*
Constructions 0.1413 0.335* 0.2745* 0.1099 -0.2901* 0.258* 0.5723*
Pool Crowberry Forest Stones Burns Cuts Soil.damages
inc NA 0.3173* 0.3219* 0.5856* -0.041 NA 0.3804*
out NA 0.226 0.4711* 0.2144 0.1883 NA 0.3309*
KIO NA -0.3931* -0.1218 -0.3361* 0.2554* NA -0.0902
KPO NA -0.2958* -0.228 -0.0418 -0.0241 NA -0.0552
KSO NA 0.4051* 0.1294 0.1779 -0.0649 NA 0.0129
t11 NA 0.5308* 0.5289* 0.6532* -0.0512 NA 0.4756*
Litoral NA -0.0136 0.1046 -0.0452 -0.0167 NA 0.1009
Sand NA 0.2143 0.3278* 0.2549* -0.1128 NA 0.2289
Gravel NA 0.2917* 0.3071* 0.248* 0.1049 NA 0.3052*
Driftwood NA 0.4357* 0.0959 0.439* -0.1594 NA 0.1104
Meadow NA 0.3185* 0.0793 0.2899* -0.1095 NA 0.0796
Rocks NA -0.577* -0.3763* -0.4426* 0.0813 NA -0.3615*
Boulders NA 0.1343 0.0865 -0.0209 -0.0692 NA 0.1103
Marsh NA 0.2464* 0.362* 0.218 -0.0571 NA 0.3777*
Pool 1* NA NA NA NA NA NA
Crowberry NA 1* 0.0805 0.4369* 0.0527 NA 0.0753
Forest NA 0.0805 1* 0.201 0.1492 NA 0.7859*
Stones NA 0.4369* 0.201 1* -0.131 NA 0.2043
Burns NA 0.0527 0.1492 -0.131 1* NA 0.1948
Cuts NA NA NA NA NA 1* NA
Soil.damages NA 0.0753 0.7859* 0.2043 0.1948 NA 1*
Fire.pits NA 0.1686 0.7695* 0.2781* 0.1981 NA 0.8644*
Constructions NA 0.2545* 0.6418* 0.3415* 0.0953 NA 0.6282*
Fire.pits Constructions
inc 0.3457* 0.3863*
out 0.3988* 0.4829*
KIO -0.0975 -0.1461
KPO -0.1769 -0.2423
KSO 0.0933 0.1979
t11 0.5292* 0.6279*
Litoral 0.0821 -0.0259
Sand 0.2174 0.1413
Gravel 0.3814* 0.335*
Driftwood 0.1396 0.2745*
Meadow 0.0887 0.1099
Rocks -0.4198* -0.2901*
Boulders 0.1515 0.258*
Marsh 0.2848* 0.5723*
Pool NA NA
Crowberry 0.1686 0.2545*
Forest 0.7695* 0.6418*
Stones 0.2781* 0.3415*
Burns 0.1981 0.0953
Cuts NA NA
Soil.damages 0.8644* 0.6282*
Fire.pits 1* 0.6471*
Constructions 0.6471* 1*

> Topm(abio.dyn)
Var1 Var2 Value
1 t11 Stones 0.6532405
2 t11 Constructions 0.6279446
3 inc Stones 0.5856212
4 t11 Marsh 0.5532972
5 inc Driftwood 0.5326938
6 t11 Crowberry 0.5307591
7 t11 Fire.pits 0.5291746
8 inc Meadow 0.5290702
9 t11 Forest 0.5289070
10 t11 Driftwood 0.5267983
11 KIO Driftwood -0.5126661
12 out Constructions 0.4828555
13 t11 Soil.damages 0.4756101
14 KIO Meadow -0.4728845
15 out Forest 0.4710993
16 t11 Rocks -0.4698249
17 inc Marsh 0.4635778
18 t11 Meadow 0.4565908
19 t11 Sand 0.4510082
20 t11 Gravel 0.4507827

> pdf(file = "pics/23870.pdf")

> oldpar <- par(mar = c(4, 4, 1, 1))

> women.lm <- lm(weight ~ height, data = women)

> plot(weight ~ height, data = women, xlab = "Height, in",
+ ylab = "Weight, lb")

> grid()

> abline(women.lm, col = "red")

> source("r/asmisc.r")

> Cladd(women.lm, data = women)

> legend("bottomright", col = 2:1, lty = 1:2, legend = c("linear relationship",
+ "95% confidence bands"))

> par(oldpar)

> dev.off()
null device
1

> summary(women.lm)

Call:
lm(formula = weight ~ height, data = women)

Residuals:
Min 1Q Median 3Q Max
-1.7333 -1.1333 -0.3833 0.7417 3.1167

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -87.51667 5.93694 -14.74 1.71e-09 ***
height 3.45000 0.09114 37.85 1.09e-14 ***
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

Residual standard error: 1.525 on 13 degrees of freedom
Multiple R-squared: 0.991, Adjusted R-squared: 0.9903
F-statistic: 1433 on 1 and 13 DF, p-value: 1.091e-14


> oldpar <- par(mfrow = c(3, 3))

> for (i in 1:9) plot(1:50, rnorm(50), xlab = "Fitted",
+ ylab = "Residuals")

> for (i in 1:9) plot(1:50, (1:50) * rnorm(50), xlab = "Fitted",
+ ylab = "Residuals")

> par(oldpar)

> women.lm2 <- lm(weight ~ height + I(height^2), data = women)

> summary(women.lm2)

Call:
lm(formula = weight ~ height + I(height^2), data = women)

Residuals:
Min 1Q Median 3Q Max
-0.50941 -0.29611 -0.00941 0.28615 0.59706

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 261.87818 25.19677 10.393 2.36e-07 ***
height -7.34832 0.77769 -9.449 6.58e-07 ***
I(height^2) 0.08306 0.00598 13.891 9.32e-09 ***
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

Residual standard error: 0.3841 on 12 degrees of freedom
Multiple R-squared: 0.9995, Adjusted R-squared: 0.9994
F-statistic: 1.139e+04 on 2 and 12 DF, p-value: < 2.2e-16


> plot(women.lm2, which = 1)

> plot(weight ~ height, data = women)

> Cladd(women.lm2, data = women, ab.lty = 1, ab.col = "red")

> pdf(file = "pics/3265.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> ipo <- read.table("data/ipomopsis.txt", h = T)

> with(ipo, plot(Root, Fruit, pch = as.numeric(Grazing)))

> abline(lm(Fruit ~ Root, data = subset(ipo, Grazing ==
+ "Grazed")))

> abline(lm(Fruit ~ Root, data = subset(ipo, Grazing ==
+ "Ungrazed")), lty = 2)

> legend("topleft", lty = 1:2, legend = c("Grazed",
+ "Ungrazed"))

> par(oldpar)

> dev.off()
pdf
2

> ipo.lm <- lm(Fruit ~ Root * Grazing, data = ipo)

> summary(ipo.lm)

Call:
lm(formula = Fruit ~ Root * Grazing, data = ipo)

Residuals:
Min 1Q Median 3Q Max
-17.3177 -2.8320 0.1247 3.8511 17.1313

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -125.173 12.811 -9.771 1.15e-11 ***
Root 23.240 1.531 15.182 < 2e-16 ***
GrazingUngrazed 30.806 16.842 1.829 0.0757 .
Root:GrazingUngrazed 0.756 2.354 0.321 0.7500
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

Residual standard error: 6.831 on 36 degrees of freedom
Multiple R-squared: 0.9293, Adjusted R-squared: 0.9234
F-statistic: 157.6 on 3 and 36 DF, p-value: < 2.2e-16


> ipo.lm2 <- update(ipo.lm, . ~ . - Root:Grazing)

> summary(ipo.lm2)

Call:
lm(formula = Fruit ~ Root + Grazing, data = ipo)

Residuals:
Min 1Q Median 3Q Max
-17.1920 -2.8224 0.3223 3.9144 17.3290

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -127.829 9.664 -13.23 1.35e-15 ***
Root 23.560 1.149 20.51 < 2e-16 ***
GrazingUngrazed 36.103 3.357 10.75 6.11e-13 ***
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

Residual standard error: 6.747 on 37 degrees of freedom
Multiple R-squared: 0.9291, Adjusted R-squared: 0.9252
F-statistic: 242.3 on 2 and 37 DF, p-value: < 2.2e-16


> AIC(ipo.lm)
[1] 273.0135

> AIC(ipo.lm2)
[1] 271.1279

> AIC(women.lm)
[1] 59.08158

> AIC(women.lm2)
[1] 18.5127

> elections <- read.table("data/elections.txt", h = TRUE)

> str(elections)
'data.frame': 153 obs. of 7 variables:
$ DISTRICT: int 1 2 3 4 5 6 7 8 9 10 ...
$ VOTER : int 329786 144483 709903 696114 717095 787593 696087 691688 730050 164275 ...
$ INVALID : int 2623 859 5656 4392 3837 4715 10456 1819 2403 89 ...
$ VALID : int 198354 97863 664195 619629 653133 655486 397797 632045 667994 161470 ...
$ CAND.1 : int 24565 7884 30491 54999 36880 72166 43559 53046 60355 234 ...
$ CAND.2 : int 11786 6364 11152 11519 10002 25204 28189 5504 2680 451 ...
$ CAND.3 : int 142627 68573 599105 525814 559653 485669 267776 563502 599798 159496 ...

> attach(elections)

> PROP <- cbind(CAND.1, CAND.2, CAND.3)/VOTER

> ATTEN <- (VALID + INVALID)/VOTER

> pdf(file = "pics/27760.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> lm.1 <- lm(CAND.1/VOTER ~ ATTEN)

> lm.2 <- lm(CAND.2/VOTER ~ ATTEN)

> lm.3 <- lm(CAND.3/VOTER ~ ATTEN)

> plot(CAND.3/VOTER ~ ATTEN, xlim = c(0, 1), ylim = c(0,
+ 1), xlab = "Attendance", ylab = "Percent voted for the candidate")

> points(CAND.1/VOTER ~ ATTEN, pch = 2)

> points(CAND.2/VOTER ~ ATTEN, pch = 3)

> abline(lm.3)

> abline(lm.2, lty = 2)

> abline(lm.1, lty = 3)

> legend("topleft", lty = c(3, 2, 1), legend = c("Party 1",
+ "Party 2", "Party 3"))

> detach(elections)

> par(oldpar)

> dev.off()
pdf
2

> elections2 <- cbind(ATTEN, stack(data.frame(PROP)))

> names(elections2) <- c("atten", "percn", "cand")

> str(elections2)
'data.frame': 459 obs. of 3 variables:
$ atten: num 0.609 0.683 0.944 0.896 0.916 ...
$ percn: num 0.0745 0.0546 0.043 0.079 0.0514 ...
$ cand : Factor w/ 3 levels "CAND.1","CAND.2",..: 1 1 1 1 1 1 1 1 1 1 ...

> ancova.v <- lm(percn ~ atten * cand, data = elections2)

> summary(ancova.v)

Call:
lm(formula = percn ~ atten * cand, data = elections2)

Residuals:
Min 1Q Median 3Q Max
-0.116483 -0.015470 -0.000699 0.014825 0.102810

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.117115 0.011973 9.781 < 2e-16 ***
atten -0.070726 0.018266 -3.872 0.000124 ***
candCAND.2 -0.023627 0.016933 -1.395 0.163591
candCAND.3 -0.547179 0.016933 -32.315 < 2e-16 ***
atten:candCAND.2 0.004129 0.025832 0.160 0.873074
atten:candCAND.3 1.393336 0.025832 53.938 < 2e-16 ***
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

Residual standard error: 0.02591 on 453 degrees of freedom
Multiple R-squared: 0.9824, Adjusted R-squared: 0.9822
F-statistic: 5057 on 5 and 453 DF, p-value: < 2.2e-16


> pdf(file = "pics/28790.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> prp.coast <- read.table("data/primula.txt", as.is = TRUE,
+ h = TRUE)

> plot(yfrac ~ nwse, data = prp.coast, type = "n", xlab = "Distance from Novorossijsk, km",
+ ylab = "Proportion of light-colored flowers, %")

> rect(129, -10, 189, 110, col = gray(0.8), border = NA)

> box()

> mtext("129", at = 128, side = 3, line = 0, cex = 0.8)

> mtext("189", at = 189, side = 3, line = 0, cex = 0.8)

> points(yfrac ~ nwse, data = prp.coast)

> abline(lm(yfrac ~ nwse, data = prp.coast), lty = 2)

> lines(loess.smooth(prp.coast$nwse, prp.coast$yfrac),
+ lty = 1)

> par(oldpar)

> dev.off()
pdf
2

> l <- read.table("data/logit.txt")

> head(l)
V1 V2
1 14 F
2 29 F
3 6 F
4 25 S
5 18 S
6 4 F

> l.logit <- glm(V2 ~ V1, family = binomial, data = l)

> summary(l.logit)

Call:
glm(formula = V2 ~ V1, family = binomial, data = l)

Deviance Residuals:
Min 1Q Median 3Q Max
-1.9987 -0.4584 -0.2245 0.4837 1.5005

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.9638 2.4597 -2.018 0.0436 *
V1 0.2350 0.1163 2.021 0.0432 *
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 18.249 on 13 degrees of freedom
Residual deviance: 10.301 on 12 degrees of freedom
AIC: 14.301

Number of Fisher Scoring iterations: 5


> tox.logit <- glm(formula = I(2 - ILL) ~ CAESAR + TOMATO,
+ family = binomial, data = tox)

> tox.logit2 <- update(tox.logit, . ~ . - TOMATO)

> tox.logit3 <- update(tox.logit, . ~ . - CAESAR)

> tox.logit$aic
[1] 47.40782

> tox.logit2$aic
[1] 45.94004

> tox.logit3$aic
[1] 53.11957

> pdf(file = "pics/27420.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> hwc <- read.table("data/hwc.txt", h = T)

> str(hwc)
'data.frame': 90 obs. of 3 variables:
$ COLOR : Factor w/ 3 levels "black","blond",..: 1 1 1 1 1 1 1 1 1 1 ...
$ WEIGHT: int 86 78 76 77 71 84 85 79 90 78 ...
$ HEIGHT: int 180 171 158 172 179 170 170 174 173 167 ...

> boxplot(WEIGHT ~ COLOR, data = hwc)

> source("r/asmisc.r")

> sapply(hwc[, 2:3], Normality)
WEIGHT HEIGHT
"NORMAL" "NORMAL"

> wc <- aov(WEIGHT ~ COLOR, data = hwc)

> summary(wc)
Df Sum Sq Mean Sq F value Pr(>F)
COLOR 2 658 329.1 8.449 0.000443 ***
Residuals 87 3388 38.9
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

> par(oldpar)

> dev.off()
pdf
2

> pairwise.t.test(hwc$WEIGHT, hwc$COLOR)

Pairwise comparisons using t tests with pooled SD

data: hwc$WEIGHT and hwc$COLOR

black blond
blond 0.00047 -
brown 0.00797 0.32349

P value adjustment method: holm

> TukeyHSD(wc)
Tukey multiple comparisons of means
95% family-wise confidence level

Fit: aov(formula = WEIGHT ~ COLOR, data = hwc)

$COLOR
diff lwr upr p adj
blond-black -6.366667 -10.208955 -2.5243782 0.0004596
brown-black -4.766667 -8.608955 -0.9243782 0.0110105
brown-blond 1.600000 -2.242288 5.4422884 0.5832793


> kruskal.test(WEIGHT ~ COLOR, data = hwc)

Kruskal-Wallis rank sum test

data: WEIGHT by COLOR
Kruskal-Wallis chi-squared = 13.569, df = 2, p-value = 0.001131


> a <- c(1, 2, 3, 4, 5, 6, 7, 8, 9)

> b <- c(5, 5, 5, 5, 5, 5, 5, 5, 5)

> dif <- a - b

> pos.dif <- dif[dif > 0]

> prop.test(length(pos.dif), length(dif))

1-sample proportions test with continuity correction

data: length(pos.dif) out of length(dif), null probability 0.5
X-squared = 0, df = 1, p-value = 1
alternative hypothesis: true p is not equal to 0.5
95 percent confidence interval:
0.1534306 0.7734708
sample estimates:
p
0.4444444


> ozone.month <- airquality[, c("Ozone", "Month")]

> ozone.month.list <- unstack(ozone.month)

> sapply(ozone.month.list, Normality)
5 6 7 8 9
"NOT NORMAL" "NORMAL" "NORMAL" "NORMAL" "NOT NORMAL"

> grades <- read.table("data/grades.txt")

> classes <- split(grades$V1, grades$V2)

> sapply(classes, Normality)
A1 A2 B1
"NOT NORMAL" "NOT NORMAL" "NOT NORMAL"

> lapply(classes, function(.x) median(.x, na.rm = TRUE))
$A1
[1] 4

$A2
[1] 4

$B1
[1] 5


> wilcox.test(classes$A1, classes$A2, paired = TRUE)

Wilcoxon signed rank test with continuity correction

data: classes$A1 and classes$A2
V = 15.5, p-value = 0.8605
alternative hypothesis: true location shift is not equal to 0


> wilcox.test(classes$B1, classes$A1, alt = "greater")

Wilcoxon rank sum test with continuity correction

data: classes$B1 and classes$A1
W = 306, p-value = 0.02382
alternative hypothesis: true location shift is greater than 0


> cashiers <- read.table("data/cashiers.txt", h = TRUE)

> head(cashiers)
CASHIER.1 CASHIER.2
1 3 12
2 12 12
3 13 9
4 5 6
5 4 2
6 11 9

> sapply(cashiers, Normality)
CASHIER.1 CASHIER.2
"NORMAL" "NORMAL"

> (cashiers.m <- sapply(cashiers, mean))
CASHIER.1 CASHIER.2
8.380952 7.809524

> with(cashiers, t.test(CASHIER.1, CASHIER.2, alt = "greater"))

Welch Two Sample t-test

data: CASHIER.1 and CASHIER.2
t = 0.43577, df = 39.923, p-value = 0.3327
alternative hypothesis: true difference in means is greater than 0
95 percent confidence interval:
-1.636702 Inf
sample estimates:
mean of x mean of y
8.380952 7.809524


> pr <- read.table("data/seedlings.txt", h = TRUE)

> str(pr)
'data.frame': 80 obs. of 2 variables:
$ CID : int 63 63 63 63 63 63 63 63 63 63 ...
$ GERM.14: int 1 1 1 1 1 1 1 1 1 1 ...

> (t1 <- chisq.test(table(pr[pr$CID %in% c(0, 105),
+ ])))

Pearson's Chi-squared test with Yates' continuity correction

data: table(pr[pr$CID %in% c(0, 105), ])
X-squared = 8.0251, df = 1, p-value = 0.004613


> (t2 <- chisq.test(table(pr[pr$CID %in% c(0, 80), ])))

Pearson's Chi-squared test with Yates' continuity correction

data: table(pr[pr$CID %in% c(0, 80), ])
X-squared = 22.727, df = 1, p-value = 1.867e-06


> (t3 <- chisq.test(table(pr[pr$CID %in% c(0, 63), ])))

Pearson's Chi-squared test with Yates' continuity correction

data: table(pr[pr$CID %in% c(0, 63), ])
X-squared = 0.27778, df = 1, p-value = 0.5982


> p.adjust(c(t1$p.value, t2$p.value, t3$p.value), method = "bonferroni")
[1] 1.384021e-02 5.600976e-06 1.000000e+00

> seeing <- read.table("data/seeing.txt")

> attach(seeing)

> seeing.logit <- glm(formula = V3 ~ V2, family = binomial)

> summary(seeing.logit)

Call:
glm(formula = V3 ~ V2, family = binomial)

Deviance Residuals:
Min 1Q Median 3Q Max
-2.4029 -0.8701 0.4299 0.7825 1.5197

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.6776 0.7923 -2.117 0.03423 *
V2 0.9015 0.2922 3.085 0.00203 **
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 62.687 on 49 degrees of freedom
Residual deviance: 49.738 on 48 degrees of freedom
AIC: 53.738

Number of Fisher Scoring iterations: 4


> pdf(file = "pics/32450.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> tries <- seq(1, 5, 0.081)

> seeing.p <- predict(seeing.logit, list(V2 = tries),
+ type = "response")

> plot(V3 ~ jitter(V2, amount = 0.1))

> lines(tries, seeing.p)

> detach(seeing)

> par(oldpar)

> dev.off()
pdf
2

> summary(aov(HEIGHT ~ COLOR, data = hwc))
Df Sum Sq Mean Sq F value Pr(>F)
COLOR 2 2842 1421.2 43.47 8.17e-14 ***
Residuals 87 2845 32.7
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

> pairwise.t.test(hwc$HEIGHT, hwc$COLOR)

Pairwise comparisons using t tests with pooled SD

data: hwc$HEIGHT and hwc$COLOR

black blond
blond 1.6e-13 -
brown 0.026 3.9e-09

P value adjustment method: holm

> pdf(file = "pics/26450.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> cor.test(hwc$WEIGHT, hwc$HEIGHT)

Pearson's product-moment correlation

data: hwc$WEIGHT and hwc$HEIGHT
t = 3.3577, df = 88, p-value = 0.001163
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.1396529 0.5085944
sample estimates:
cor
0.3369977


> w.h <- lm(WEIGHT ~ HEIGHT, data = hwc)

> summary(w.h)

Call:
lm(formula = WEIGHT ~ HEIGHT, data = hwc)

Residuals:
Min 1Q Median 3Q Max
-12.5097 -4.7395 -0.2204 4.7630 17.3835

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 30.73200 13.90175 2.211 0.02965 *
HEIGHT 0.28427 0.08466 3.358 0.00116 **
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

Residual standard error: 6.385 on 88 degrees of freedom
Multiple R-squared: 0.1136, Adjusted R-squared: 0.1035
F-statistic: 11.27 on 1 and 88 DF, p-value: 0.001163


> plot(WEIGHT ~ HEIGHT, data = hwc)

> abline(w.h)

> Cladd(w.h, data = hwc)

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/27180.pdf")

> oldpar <- par(mar = c(0, 0, 0, 0))

> pairs(iris[, 1:4], pch = 21, bg = as.numeric(iris[,
+ 5]))

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/27000.pdf")

> oldpar <- par(mar = c(0, 0, 0, 1))

> library(lattice)

> xyplot(Sepal.Length ~ Petal.Length + Petal.Width |
+ Species, data = iris, auto.key = TRUE)

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/35270.pdf")

> oldpar <- par(mar = c(0, 0, 0, 1))

> coplot(percn ~ atten | cand, data = elections2)

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/27400.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> stars(mtcars[1:9, 1:7], cex = 1.2)

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/27580_to_crop.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> library(TeachingDemos)

> faces(mtcars[1:9, 1:7])

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/34390.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> library(MASS)

> parcoord(iris[, -5], col = rep(1:3, table(iris[, 5])))

> legend("top", bty = "n", lty = 1, col = 1:3, legend = names(table(iris[,
+ 5])))

> par(oldpar)

> dev.off()
pdf
2

> iris.pca <- princomp(scale(iris[, 1:4]))

> pdf(file = "pics/27890.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> plot(iris.pca, main = "")

> par(oldpar)

> dev.off()
pdf
2

> summary(iris.pca)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4
Standard deviation 1.7026571 0.9528572 0.38180950 0.143445939
Proportion of Variance 0.7296245 0.2285076 0.03668922 0.005178709
Cumulative Proportion 0.7296245 0.9581321 0.99482129 1.000000000

> pdf(file = "pics/28180.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> iris.p <- predict(iris.pca)

> plot(iris.p[, 1:2], type = "n", xlab = "PC1", ylab = "PC2")

> text(iris.p[, 1:2], labels = abbreviate(iris[, 5],
+ 1, method = "both.sides"))

> par(oldpar)

> dev.off()
pdf
2

> oldpar <- par()

> pdf(file = "pics/28360.pdf")

> oldpar <- par(mar = c(2, 2, 2, 2))

> biplot(iris.pca)

> par(oldpar)

> dev.off()
pdf
2

> par(oldpar)

> loadings(iris.pca)

Loadings:
Comp.1 Comp.2 Comp.3 Comp.4
Sepal.Length 0.521 -0.377 0.720 0.261
Sepal.Width -0.269 -0.923 -0.244 -0.124
Petal.Length 0.580 -0.142 -0.801
Petal.Width 0.565 -0.634 0.524

Comp.1 Comp.2 Comp.3 Comp.4
SS loadings 1.00 1.00 1.00 1.00
Proportion Var 0.25 0.25 0.25 0.25
Cumulative Var 0.25 0.50 0.75 1.00

> pdf(file = "pics/28750.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> library(ade4)

> iris.dudi <- dudi.pca(iris[, 1:4], scannf = FALSE)

> s.class(iris.dudi$li, iris[, 5])

> par(oldpar)

> dev.off()
pdf
2

> iris.between <- bca(iris.dudi, iris[, 5], scannf = FALSE)

> randtest(iris.between)
Monte-Carlo test
Call: randtest.between(xtest = iris.between)

Observation: 0.7224358

Based on 999 replicates
Simulated p-value: 0.001
Alternative hypothesis: greater

Std.Obs Expectation Variance
7.137899e+01 1.313914e-02 9.874499e-05

> library(cluster)

> iris.dist <- daisy(iris[, 1:4], metric = "manhattan")

> pdf(file = "pics/36340.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> library(KernSmooth)

> iris.c <- cmdscale(iris.dist)

> est <- bkde2D(iris.c[, 1:2], bandwidth = c(0.7, 1.5))

> plot(iris.c[, 1:2], type = "n", xlab = "Dim. 1", ylab = "Dim. 2")

> text(iris.c[, 1:2], labels = abbreviate(iris[, 5],
+ 1, method = "both.sides"))

> contour(est$x1, est$x2, est$fhat, add = TRUE, drawlabels = FALSE,
+ lty = 3)

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/29530.pdf")

> oldpar <- par(mar = c(0, 2, 0, 1))

> iriss <- iris[seq(1, nrow(iris), 5), ]

> iriss.dist <- daisy(iriss[, 1:4])

> iriss.h <- hclust(iriss.dist, method = "ward")

> plot(iriss.h, labels = abbreviate(iriss[, 5], 1, method = "both.sides"),
+ main = "")

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/29730.pdf")

> oldpar <- par(mar = c(2, 2, 1, 1))

> library(pvclust)

> irisst <- t(iriss[, 1:4])

> colnames(irisst) <- paste(abbreviate(iriss[, 5], 3),
+ colnames(irisst))

> iriss.pv <- pvclust(irisst, method.dist = "manhattan",
+ method.hclust = "ward", nboot = 100)
Bootstrap (r = 0.5)... Done.
Bootstrap (r = 0.5)... Done.
Bootstrap (r = 0.5)... Done.
Bootstrap (r = 0.75)... Done.
Bootstrap (r = 0.75)... Done.
Bootstrap (r = 1.0)... Done.
Bootstrap (r = 1.0)... Done.
Bootstrap (r = 1.0)... Done.
Bootstrap (r = 1.25)... Done.
Bootstrap (r = 1.25)... Done.

> plot(iriss.pv, col.pv = c(1, 0, 0), main = "")

> par(oldpar)

> dev.off()
pdf
2

> eq <- read.table("data/eq.txt", h = TRUE)

> eq.k <- kmeans(eq[, -1], 2)

> table(eq.k$cluster, eq$SPECIES)

arvense fluviatile
1 1 41
2 37 5

> pdf(file = "pics/36040.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> iris.f <- fanny(iris[, 1:4], 3)

> plot(iris.f, which = 1, main = "")

> head(data.frame(sp = iris[, 5], iris.f$membership))
sp X1 X2 X3
1 setosa 0.9142273 0.03603116 0.04974153
2 setosa 0.8594576 0.05854637 0.08199602
3 setosa 0.8700857 0.05463714 0.07527719
4 setosa 0.8426296 0.06555926 0.09181118
5 setosa 0.9044503 0.04025288 0.05529687
6 setosa 0.7680227 0.09717445 0.13480286

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/37300.pdf")

> oldpar <- par(mar = c(2, 2, 2, 2))

> library(MASS)

> caith
fair red medium dark black
blue 326 38 241 110 3
light 688 116 584 188 4
medium 343 84 909 412 26
dark 98 48 403 681 85

> biplot(corresp(caith, nf = 2))

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/4446.pdf")

> oldpar <- par(mar = c(2, 2, 2, 2))

> require(vegan)

> alla <- read.table("data/lakesea_abio.txt", sep = "\t",
+ h = T)

> allc <- read.table("data/lakesea_bio.txt", sep = "\t",
+ h = T)

> all.cca <- cca(allc, alla[, -14])

> plot.all.cca <- plot(all.cca, display = c("sp", "cn"))

> points(all.cca, display = "sites", pch = ifelse(alla[,
+ 14], 15, 0))

> legend("topleft", pch = c(15, 0), legend = c("lake",
+ "sea"))

> text(-1.6, -4.2, "Carex.lasiocarpa", pos = 4)

> par(oldpar)

> dev.off()
pdf
2

> library(MASS)

> iris.train <- iris[seq(1, nrow(iris), 5), ]

> iris.unknown <- iris[-seq(1, nrow(iris), 5), ]

> iris.lda <- lda(iris.train[, 1:4], iris.train[, 5])

> iris.ldap <- predict(iris.lda, iris.unknown[, 1:4])$class

> table(iris.ldap, iris.unknown[, 5])

iris.ldap setosa versicolor virginica
setosa 40 0 0
versicolor 0 40 7
virginica 0 0 33

> Misclass <- function(pred, obs) {
+ tbl <- table(pred, obs)
+ sum <- colSums(tbl)
+ dia <- diag(tbl)
+ msc <- (sum - dia)/sum * 100
.... [TRUNCATED]

> Misclass(iris.ldap, iris.unknown[, 5])
Classification table:
obs
pred setosa versicolor virginica
setosa 40 0 0
versicolor 0 40 7
virginica 0 0 33
Misclassification errors:
setosa versicolor virginica
0.0 0.0 17.5

> ldam <- manova(as.matrix(iris.unknown[, 1:4]) ~ iris.ldap)

> summary(ldam, test = "Wilks")
Df Wilks approx F num Df den Df Pr(>F)
iris.ldap 2 0.026878 145.34 8 228 < 2.2e-16 ***
Residuals 117
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

> pdf(file = "pics/30320.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> iris.lda2 <- lda(iris[, 1:4], iris[, 5])

> iris.ldap2 <- predict(iris.lda2, dimen = 2)$x

> plot(iris.ldap2, type = "n", xlab = "LD1", ylab = "LD2")

> text(iris.ldap2, labels = abbreviate(iris[, 5], 1,
+ method = "both.sides"))

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/30520.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> library(tree)

> iris.tree <- tree(iris[, 5] ~ ., iris[, -5])

> plot(iris.tree)

> text(iris.tree)

> par(oldpar)

> dev.off()
pdf
2

> library(randomForest)

> set.seed(17)

> iris.rf <- randomForest(iris.train[, 5] ~ ., data = iris.train[,
+ 1:4])

> iris.rfp <- predict(iris.rf, iris.unknown[, 1:4])

> table(iris.rfp, iris.unknown[, 5])

iris.rfp setosa versicolor virginica
setosa 40 0 0
versicolor 0 39 7
virginica 0 1 33

> pdf(file = "pics/30890.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> set.seed(17)

> iris.urf <- randomForest(iris[, -5])

> MDSplot(iris.urf, iris[, 5])

> par(oldpar)

> dev.off()
pdf
2

> library(e1071)

> iris.svm <- svm(Species ~ ., data = iris.train)

> iris.svmp <- predict(iris.svm, iris[, 1:4])

> table(iris.svmp, iris[, 5])

iris.svmp setosa versicolor virginica
setosa 50 0 0
versicolor 0 50 14
virginica 0 0 36

> beer <- read.table("data/beer.txt", sep = "\t", h = TRUE)

> head(beer)
C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 C13 C14 C15 C16
Afanas.light 0 1 0 1 0 0 0 0 1 0 1 0 1 0 0 0
Baltika.3 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0 1
Baltika.6 0 1 1 1 1 0 1 0 1 1 1 0 1 0 0 1
Baltika.9 0 1 1 1 1 0 1 0 1 1 1 0 1 0 0 1
Bochk.light 0 1 0 0 1 1 0 1 1 1 1 1 0 0 0 1
Budweiser 0 1 1 0 1 0 1 0 1 1 0 1 0 0 0 0
C17 C18 C19 C20
Afanas.light 0 0 0 0
Baltika.3 0 0 0 0
Baltika.6 0 0 0 1
Baltika.9 0 0 0 1
Bochk.light 0 0 0 0
Budweiser 1 0 0 1

> pdf(file = "pics/38270.pdf")

> oldpar <- par(mar = c(0, 2, 0, 1))

> library(vegan)

> beer.d <- vegdist(beer, "jaccard")

> plot(hclust(beer.d, "ward"), main = "", xlab = "",
+ sub = "")

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/37980.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> eq <- read.table("data/eq.txt", h = TRUE)

> eq.tree <- tree(eq[, 1] ~ ., eq[, -1])

> plot(eq.tree)

> text(eq.tree)

> par(oldpar)

> dev.off()
pdf
2

> Sweave("test-Sweave.Rnw")
Writing to file test-Sweave.tex
Processing code chunks with options ...
1 : echo keep.source print term verbatim (test-Sweave.Rnw:8)
2 : echo keep.source term verbatim pdf (test-Sweave.Rnw:16)

You can now run (pdf)latex on òÀØtest-Sweave.texòÀÙ

> citation()

To cite R in publications use:

R Core Team (2015). R: A language and environment for statistical
computing. R Foundation for Statistical Computing, Vienna, Austria.
URL https://www.R-project.org/.

A BibTeX entry for LaTeX users is

@Manual{,
title = {R: A Language and Environment for Statistical Computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = {2015},
url = {https://www.R-project.org/},
}

We have invested a lot of time and effort in creating R, please cite it
when using it for data analysis. See also òÀØcitation("pkgname")òÀÙ for
citing R packages.


> dir("data")
[1] "abio_c.txt" "abio.txt" "beer_c.txt" "beer.txt"
[5] "betula_c.txt" "betula.txt" "bugs_new.txt" "bugs.txt"
[9] "cashiers_c.txt" "cashiers.txt" "cplants_c.txt" "cplants.txt"
[13] "daisy_c.txt" "daisy.txt" "elections_c.txt" "elections.txt"
[17] "eq_c.txt" "eq_l.txt" "eq_s.txt" "eq.txt"
[21] "errors_c.txt" "errors.txt" "grades_c.txt" "grades.txt"
[25] "hwc_c.txt" "hwc.txt" "ipomopsis_c.txt" "ipomopsis.txt"
[29] "lakesea_abio.txt" "lakesea_bio.txt" "logit_c.txt" "logit.txt"
[33] "mydata.txt" "mydata1.txt" "mydata3.txt" "poisoning_c.txt"
[37] "poisoning.txt" "pokorm_c.txt" "pokorm.txt" "primula_c.txt"
[41] "primula.txt" "seedlings_c.txt" "seedlings.txt" "seeing_c.txt"
[45] "seeing.txt" "spur_c.txt" "spur.txt" "sundew_c.txt"
[49] "sundew.txt"

> data <- read.table("data/bugs.txt", h = TRUE)

> data
SEX COLOR WEIGHT LENGTH
1 0 1 10.68 9.43
2 1 1 10.02 10.66
3 0 2 10.18 10.41
4 1 1 8.01 9.00
5 0 3 10.23 8.98
6 1 3 9.70 9.71
7 1 2 9.73 9.09
8 0 3 11.22 9.23
9 1 1 9.19 8.97
10 1 2 11.45 10.34

> str(data)
'data.frame': 10 obs. of 4 variables:
$ SEX : int 0 1 0 1 0 1 1 0 1 1
$ COLOR : int 1 1 2 1 3 3 2 3 1 2
$ WEIGHT: num 10.68 10.02 10.18 8.01 10.23 ...
$ LENGTH: num 9.43 10.66 10.41 9 8.98 ...

> data.f <- data[data$SEX == 0, ]

> data.m.big <- data[data$SEX == 1 & data$LENGTH > 10,
+ ]

> data$WEIGHT.R <- data$WEIGHT/data$LENGTH

> write.table(data, "data/bugs_new.txt", quote = FALSE)

> summary(data)
SEX COLOR WEIGHT LENGTH
Min. :0.0 Min. :1.00 Min. : 8.010 Min. : 8.970
1st Qu.:0.0 1st Qu.:1.00 1st Qu.: 9.707 1st Qu.: 9.023
Median :1.0 Median :2.00 Median :10.100 Median : 9.330
Mean :0.6 Mean :1.90 Mean :10.041 Mean : 9.582
3rd Qu.:1.0 3rd Qu.:2.75 3rd Qu.:10.568 3rd Qu.:10.182
Max. :1.0 Max. :3.00 Max. :11.450 Max. :10.660
WEIGHT.R
Min. :0.8900
1st Qu.:0.9832
Median :1.0475
Mean :1.0496
3rd Qu.:1.1263
Max. :1.2156

> summary(data$WEIGHT)
Min. 1st Qu. Median Mean 3rd Qu. Max.
8.010 9.708 10.100 10.040 10.570 11.450

> min(data$WEIGHT)
[1] 8.01

> max(data$WEIGHT)
[1] 11.45

> median(data$WEIGHT)
[1] 10.1

> mean(data$WEIGHT)
[1] 10.041

> colMeans(data)
SEX COLOR WEIGHT LENGTH WEIGHT.R
0.600000 1.900000 10.041000 9.582000 1.049648

> mean(data$WEIGHT, na.rm = TRUE)
[1] 10.041

> data.o <- na.omit(data)

> sum(data$WEIGHT)
[1] 100.41

> sum(data[2, ])
[1] 23.61996

> apply(data, 1, sum)
[1] 22.24256 23.61996 23.56791 19.90000 23.34920 24.40897 22.89041 24.66560
[9] 21.18453 25.89735

> table(data$SEX)

0 1
4 6

> table(data$COLOR)

1 2 3
4 3 3

> 100 * table(data$SEX)/length(data$SEX)

0 1
40 60

> round(100 * table(data$SEX)/length(data$SEX), 0)

0 1
40 60

> sd(data$WEIGHT)
[1] 0.994501

> 100 * sd(data$WEIGHT)/mean(data$WEIGHT)
[1] 9.904402

> tapply(data$WEIGHT, data$SEX, mean)
0 1
10.577500 9.683333

> table(data$COLOR, data$SEX)

0 1
1 1 3
2 1 2
3 2 1

> 100 * table(data$COLOR, data$SEX)/sum(data$COLOR,
+ data$SEX)

0 1
1 4 12
2 4 8
3 8 4

> tapply(data$WEIGHT, list(data$SEX, data$COLOR), mean)
1 2 3
0 10.680000 10.18 10.725
1 9.073333 10.59 9.700

> hist(data$WEIGHT, breaks = 20)

> hist(data$WEIGHT, breaks = c(seq(0, 100, 20)))

> qqnorm(data$WEIGHT)

> qqline(data$WEIGHT)

> plot(data$LENGTH, data$WEIGHT, type = "p")

> plot(data$LENGTH, data$WEIGHT, type = "p", cex = 0.5)

> plot(data$LENGTH, data$WEIGHT, type = "p", cex = 2)

> pchShow <- function(extras = c("*", ".", "o", "O",
+ "0", "+", "-", "|", "%", "#"), cex = 3, col = "black", bg = "gray",
+ coltext = "blac ..." ... [TRUNCATED]

> pdf(file = "pics/47690.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> pchShow(c("*", ".", "+"), cex = 2)

> par(oldpar)

> dev.off()
pdf
2

> plot(data$LENGTH, data$WEIGHT, type = "p", pch = 2)

> plot(data$LENGTH, data$WEIGHT, type = "n")

> text(data$LENGTH, data$WEIGHT, labels = data$SEX)

> plot(data$LENGTH, data$WEIGHT, type = "n")

> text(data$LENGTH, data$WEIGHT, labels = data$SEX,
+ col = data$SEX + 1)

> plot(data$LENGTH, data$WEIGHT, type = "n")

> points(data$LENGTH, data$WEIGHT, pch = data$SEX)

> pdf(file = "pics/52050.pdf")

> oldpar <- par(mar = c(4, 4, 1, 1))

> plot(data$LENGTH, data$WEIGHT, type = "n", xlab = "Length",
+ ylab = "Weight")

> text(data$LENGTH, data$WEIGHT, labels = ifelse(data$SEX,
+ "\\MA", "\\VE"), vfont = c("serif", "plain"), cex = 1.5)

> par(oldpar)

> dev.off()
pdf
2

> plot(data$LENGTH, data$WEIGHT, type = "n")

> points(data$LENGTH, data$WEIGHT, pch = data$SEX, col = data$SEX +
+ 1)

> legend("bottomright", c("male", "female"), pch = c(0,
+ 1), col = c(1, 2))

> dev.copy(pdf, "graph.pdf")
pdf
3

> dev.off()
pdf
2

> pdf("graph.pdf")

> plot(data$LENGTH, data$WEIGHT, type = "n")

> text(data$LENGTH, data$WEIGHT, pch = data$SEX, col = data$SEX +
+ 1)

> legend("bottomright", c("male", "female"), pch = c(0,
+ 1), col = c(1, 2))

> dev.off()
pdf
2

> plot(data[order(data$LENGTH), c("LENGTH", "WEIGHT")],
+ type = "o")

> plot(data[order(data$COLOR), c("COLOR", "WEIGHT")],
+ type = "o", ylim = c(5, 15))

> lines(data[order(data$COLOR), c("COLOR", "LENGTH")],
+ lty = 3)

> boxplot(data$LENGTH)

> boxplot(data$LENGTH ~ factor(data$SEX))

> t.test(data$WEIGHT, data$LENGTH, paired = TRUE)

Paired t-test

data: data$WEIGHT and data$LENGTH
t = 1.529, df = 9, p-value = 0.1606
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2201116 1.1381116
sample estimates:
mean of the differences
0.459


> t.test(data$WEIGHT, data$LENGTH, paired = FALSE)

Welch Two Sample t-test

data: data$WEIGHT and data$LENGTH
t = 1.2169, df = 15.62, p-value = 0.2417
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.3421944 1.2601944
sample estimates:
mean of x mean of y
10.041 9.582


> t.test(data$WEIGHT ~ data$SEX)

Welch Two Sample t-test

data: data$WEIGHT by data$SEX
t = 1.7277, df = 7.2425, p-value = 0.1262
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.3213711 2.1097044
sample estimates:
mean in group 0 mean in group 1
10.577500 9.683333


> wilcox.test(data$WEIGHT, data$LENGTH, paired = TRUE)

Wilcoxon signed rank test with continuity correction

data: data$WEIGHT and data$LENGTH
V = 40.5, p-value = 0.202
alternative hypothesis: true location shift is not equal to 0


> oneway.test(data$WEIGHT ~ data$COLOR)

One-way analysis of means (not assuming equal variances)

data: data$WEIGHT and data$COLOR
F = 0.86848, num df = 2.0000, denom df = 4.5846, p-value = 0.4788


> pairwise.t.test(data$WEIGHT, data$COLOR, p.adj = "bonferroni")

Pairwise comparisons using t tests with pooled SD

data: data$WEIGHT and data$COLOR

1 2
2 0.7 -
3 0.8 1.0

P value adjustment method: bonferroni

> kruskal.test(data$WEIGHT ~ data$COLOR)

Kruskal-Wallis rank sum test

data: data$WEIGHT by data$COLOR
Kruskal-Wallis chi-squared = 1.6545, df = 2, p-value = 0.4372


> pairwise.wilcox.test(data$WEIGHT, data$COLOR)

Pairwise comparisons using Wilcoxon rank sum test

data: data$WEIGHT and data$COLOR

1 2
2 1 -
3 1 1

P value adjustment method: holm

> chisq.test(data$COLOR, data$SEX)

Pearson's Chi-squared test

data: data$COLOR and data$SEX
X-squared = 1.3194, df = 2, p-value = 0.517


> prop.test(c(sum(data$SEX)), c(length(data$SEX)), 0.5)

1-sample proportions test with continuity correction

data: c(sum(data$SEX)) out of c(length(data$SEX)), null probability 0.5
X-squared = 0.1, df = 1, p-value = 0.7518
alternative hypothesis: true p is not equal to 0.5
95 percent confidence interval:
0.2736697 0.8630694
sample estimates:
p
0.6


> cor.test(data$WEIGHT, data$LENGTH, method = "pearson")

Pearson's product-moment correlation

data: data$WEIGHT and data$LENGTH
t = 1.2276, df = 8, p-value = 0.2545
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3089373 0.8217631
sample estimates:
cor
0.3981316


> cor.test(data$WEIGHT, data$LENGTH, method = "spearman")

Spearman's rank correlation rho

data: data$WEIGHT and data$LENGTH
S = 104, p-value = 0.2956
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.369697


> summary(lm(data$LENGTH ~ data$SEX))

Call:
lm(formula = data$LENGTH ~ data$SEX)

Residuals:
Min 1Q Median 3Q Max
-0.6583 -0.5369 -0.1825 0.5542 1.0317

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.5125 0.3478 27.349 3.44e-09 ***
data$SEX 0.1158 0.4490 0.258 0.803
---
Signif. codes: 0 òÀØ***òÀÙ 0.001 òÀØ**òÀÙ 0.01 òÀØ*òÀÙ 0.05 òÀØ.òÀÙ 0.1 òÀØ òÀÙ 1

Residual standard error: 0.6956 on 8 degrees of freedom
Multiple R-squared: 0.00825, Adjusted R-squared: -0.1157
F-statistic: 0.06655 on 1 and 8 DF, p-value: 0.8029


> anova(lm(data$LENGTH ~ data$SEX))
Analysis of Variance Table

Response: data$LENGTH
Df Sum Sq Mean Sq F value Pr(>F)
data$SEX 1 0.0322 0.03220 0.0665 0.8029
Residuals 8 3.8712 0.48389

> Recode <- function(var, from, to) {
+ x <- as.vector(var)
+ x.tmp <- x
+ for (i in 1:length(from)) {
+ x <- replace(x, x.tmp == .... [TRUNCATED]

> Recode4 <- function(var, from, to, missed = "") {
+ ifelse(Recode(var, from, to) == var, missed, Recode(var,
+ from, to))
+ }

> replace(rep(1:10, 2), c(2, 3, 5), c("a", "b", "c"))
[1] "1" "a" "b" "4" "c" "6" "7" "8" "9" "10" "1" "2" "3" "4" "5"
[16] "6" "7" "8" "9" "10"

> Recode(rep(1:10, 2), c(2, 3, 5), c("a", "b", "c"))
[1] "1" "a" "b" "4" "c" "6" "7" "8" "9" "10" "1" "a" "b" "4" "c"
[16] "6" "7" "8" "9" "10"

> locations <- read.table("data/eq_l.txt", h = T, sep = ";")

> measurements <- read.table("data/eq_s.txt", h = T,
+ sep = ";")

> head(locations)
N.POP WHERE SPECIES
1 1 Tver arvense
2 2 Tver arvense
3 3 Tver arvense
4 4 Tver arvense
5 5 Tver pratense
6 6 Tver palustre

> head(measurements)
N.POP DL.R DIA.ST N.REB N.ZUB DL.OSN.Z DL.TR.V DL.BAZ DL.PER
1 1 424 2.3 13 12 2.0 5 3.0 25
2 1 339 2.0 11 12 1.0 4 2.5 13
3 1 321 2.5 15 14 2.0 5 2.3 13
4 1 509 3.0 14 14 1.5 5 2.2 23
5 1 462 2.5 12 13 1.1 4 2.1 12
6 1 350 1.8 9 9 1.1 4 2.0 15

> loc.N.POP <- Recode(measurements$N.POP, locations$N.POP,
+ as.character(locations$SPECIES))

> head(cbind(species = loc.N.POP, measurements))
species N.POP DL.R DIA.ST N.REB N.ZUB DL.OSN.Z DL.TR.V DL.BAZ DL.PER
1 arvense 1 424 2.3 13 12 2.0 5 3.0 25
2 arvense 1 339 2.0 11 12 1.0 4 2.5 13
3 arvense 1 321 2.5 15 14 2.0 5 2.3 13
4 arvense 1 509 3.0 14 14 1.5 5 2.2 23
5 arvense 1 462 2.5 12 13 1.1 4 2.1 12
6 arvense 1 350 1.8 9 9 1.1 4 2.0 15

> m <- c("Plantago major", "Plantago lanceolata", "Littorella uniflora")

> do.call(rbind, strsplit(m, split = " "))
[,1] [,2]
[1,] "Plantago" "major"
[2,] "Plantago" "lanceolata"
[3,] "Littorella" "uniflora"

> m.o
[1] L S XL XXL S M L
Levels: S < M < L < XL < XXL

> model.matrix(~m.o - 1, data = data.frame(m.o))
m.oS m.oM m.oL m.oXL m.oXXL
1 0 0 1 0 0
2 1 0 0 0 0
3 0 0 0 1 0
4 0 0 0 0 1
5 1 0 0 0 0
6 0 1 0 0 0
7 0 0 1 0 0
attr(,"assign")
[1] 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$m.o
[1] "contr.poly"


> sapply(levels(m.o), function(.x) {
+ d <- rep(0, length(m.o))
+ d[m.o == .x] <- 1
+ d
+ })
S M L XL XXL
[1,] 0 0 1 0 0
[2,] 1 0 0 0 0
[3,] 0 0 0 1 0
[4,] 0 0 0 0 1
[5,] 1 0 0 0 0
[6,] 0 1 0 0 0
[7,] 0 0 1 0 0

> dates.df <- data.frame(dates = c("2011-01-01", "2011-01-02",
+ "2011-01-03", "2011-01-04", "2011-01-05"))

> str(dates.df$dates)
Factor w/ 5 levels "2011-01-01","2011-01-02",..: 1 2 3 4 5

> dates.1 <- as.Date(dates.df$dates, "%Y-%m-%d")

> str(dates.1)
Date[1:5], format: "2011-01-01" "2011-01-02" "2011-01-03" "2011-01-04" ...

> d <- c(20130708, 19990203, 17650101)

> as.Date(as.character(d), "%Y%m%d")
[1] "2013-07-08" "1999-02-03" "1765-01-01"

> ts(1:10, frequency = 4, start = c(1959, 2))
Qtr1 Qtr2 Qtr3 Qtr4
1959 1 2 3
1960 4 5 6 7
1961 8 9 10

> z <- ts(matrix(rnorm(300), 100, 3), start = c(1961,
+ 1), frequency = 12)

> pdf(file = "pics/32340.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> plot(z, plot.type = "single", lty = 1:3)

> par(oldpar)

> dev.off()
pdf
2

> leaf <- read.table("data/sundew.txt", head = TRUE,
+ as.is = TRUE, sep = ";")

> str(leaf)
'data.frame': 80 obs. of 2 variables:
$ WET : int 2 1 1 2 1 1 1 1 1 1 ...
$ SHAPE: int 1 1 1 2 2 2 2 2 2 2 ...

> summary(leaf)
WET SHAPE
Min. :1.000 Min. :1.0
1st Qu.:1.000 1st Qu.:1.0
Median :1.000 Median :2.0
Mean :1.325 Mean :1.7
3rd Qu.:2.000 3rd Qu.:2.0
Max. :2.000 Max. :2.0

> shape <- ts(leaf$SHAPE, frequency = 36)

> str(shape)
Time-Series [1:80] from 1 to 3.19: 1 1 1 2 2 2 2 2 2 2 ...

> pdf(file = "pics/33160.pdf")

> oldpar <- par(mar = c(4, 4, 0, 1))

> (acf(shape, main = ""))

Autocorrelations of series òÀØshapeòÀÙ, by lag

0.0000 0.0278 0.0556 0.0833 0.1111 0.1389 0.1667 0.1944 0.2222 0.2500 0.2778
1.000 0.614 0.287 0.079 0.074 0.068 -0.038 -0.085 -0.132 -0.137 -0.024
0.3056 0.3333 0.3611 0.3889 0.4167 0.4444 0.4722 0.5000 0.5278
0.030 0.025 -0.082 -0.087 0.027 0.021 -0.043 -0.090 -0.137

> par(oldpar)

> dev.off()
pdf
2

> pdf(file = "pics/33310.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> plot(stl(shape, s.window = "periodic")$time.series,
+ main = "")

> par(oldpar)

> dev.off()
pdf
2

> library(boot)

> ave.b <- function(data, indices) {
+ d <- data[indices]
+ return(mean(d))
+ }

> (result.b <- boot(data = trees$Height, statistic = ave.b,
+ R = 100))

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = trees$Height, statistic = ave.b, R = 100)


Bootstrap Statistics :
original bias std. error
t1* 76 -0.09709677 1.060262

> pdf(file = "pics/06074.pdf")

> plot(result.b)

> dev.off()
pdf
2

> boot.ci(result.b, type = "bca")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 100 bootstrap replicates

CALL :
boot.ci(boot.out = result.b, type = "bca")

Intervals :
Level BCa
95% (73.62, 77.78 )
Calculations and Intervals on Original Scale
Some BCa intervals may be unstable

> spur <- scan("data/spur.txt")

> library(bootstrap)

> result.b2 <- bootstrap(x = spur, 100, function(x) mean(x))

> median(result.b2$thetastar)
[1] 7.51638

> quantile(result.b2$thetastar, probs = c(0.025, 0.975))
2.5% 97.5%
7.443369 7.575536

> result.j <- jackknife(x = spur, function(x) mean(x))

> median(result.j$jack.values)
[1] 7.516424

> result.j$jack.se
[1] 0.04258603

> quantile(result.j$jack.values, probs = c(0.025, 0.975))
2.5% 97.5%
7.513775 7.517748

> str(classes)
List of 3
$ A1: int [1:23] 5 4 5 5 4 5 3 4 5 3 ...
$ A2: int [1:23] 5 4 5 5 4 4 4 4 5 3 ...
$ B1: int [1:23] 5 4 3 4 5 5 4 5 5 4 ...

> sapply(classes, function(.x) median(.x, na.rm = T))
A1 A2 B1
4 4 5

> set.seed(5998)

> differences <- with(classes, replicate(1000, median(sample(A1,
+ size = length(A1), replace = TRUE), na.rm = TRUE) - median(sample(B1,
+ s .... [TRUNCATED]

> quantile(differences, probs = c(0.025, 0.975))
2.5% 97.5%
-1 0

> wet <- ts(leaf$WET, frequency = 36)

> str(wet)
Time-Series [1:80] from 1 to 3.19: 2 1 1 2 1 1 1 1 1 1 ...

> acf(wet)

> plot(stl(wet, s.window = "periodic")$time.series)

> pdf(file = "pics/76680.pdf")

> oldpar <- par(mar = c(2, 2, 0, 1))

> plot(density(rnorm(10^5)), main = "", xlab = "", ylab = "")

> par(oldpar)

> dev.off()
pdf
2