Table of Contents
1 Keen Chapter 4
Graphics out of the book Graphics for Statistics and Data Analysis with R by Kevin Keen (book home page)
1.1 Figure 4.17 EDF plot
- first set up the data frame:
- we use the
ecdf()
function to get create a stepfunctionecdfmass()
- from this function we can extract the knots (which will be mapped to the x-axis)
- using this knots as arguments in
ecdfmass()
we'll get the belonging probabilities (which we will map to the y-axis)
- the
end
column in df contains the end points of the horizontal lines in the step function - it's only the knots vector beginning with the second element and setting the last element toNA
- we use the
mass<-c(5.9,32.0,40.0,51.5,70.0,100.0,78.0,80.0,85.0,85.0, 110.0,115.0,125.0,130.0,120.0,120.0,130.0,135.0,110.0,130.0, 150.0,145.0,150.0,170.0,225.0,145.0,188.0,180.0,197.0,218.0, 300.0,260.0,265.0,250.0,250.0,300.0,320.0,514.0,556.0,840.0, 685.0,700.0,700.0,690.0,900.0,650.0,820.0,850.0,900.0,1015.0, 820.0,1100.0,1000.0,1100.0,1000.0,1000.0) ecdfmass <- ecdf(mass) kn <- knots(ecdfmass) ed <- ecdfmass(kn) df <- data.frame(knots=kn,ed=ed,end=c(kn[-1],NA)) head(df)
knots ed end 1 5.9 0.01785714 32.0 2 32.0 0.03571429 40.0 3 40.0 0.05357143 51.5 4 51.5 0.07142857 70.0 5 70.0 0.08928571 78.0 6 78.0 0.10714286 80.0
- now we first set the aesthetics for the points:
x
toknots
andy
toed
- add the point layer and setting the point size to 3:
geom_point(size=3)
- add the lines using a segment layer setting the aesthetics
xend
andyend
:geom_segment(aes(xend=end,yend=ed))
- in the next step we add two addition segment layers, one for each arrow; you can also use
annotate()
to do this; inside these segments we use thearrow()
function from thegrid
package, so we can define the appearance of our arrows
- the next two lines change the appearance of the axes: setting the limits, the breaks and the expansion
- then set the appropriate axes titles and customize axis elements and
panel.background
require(grid) ## for the arrow() function ggplot(df,aes(x=knots,y=ed)) + geom_point(size=3) + geom_segment(aes(xend=end,yend=ed)) + geom_segment(x=min(df$knots),xend=-45,y=0,yend=0,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=max(df$knots),xend=1245,y=1,yend=1,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + scale_x_continuous(limits=c(-45,1245),breaks=seq(0,1200,by=200),expand=c(0,0)) + scale_y_continuous(limits=c(-0.01,1.05),breaks=seq(0,1,by=0.2),expand=c(0,0)) + xlab("Mass (g)") + ylab("Empirical Distribution Function") + theme( panel.background=element_blank(), axis.line=element_line(colour="black"), axis.text=element_text(colour="black",size=14), axis.title=element_text(colour="black",size=14), axis.ticks=element_line(colour="black"), axis.ticks.length=unit(3,"mm") )
1.2 Figure 4.18 EDF plot
- we only have to change the axis breaks of the y-axis and add the horizontal lines
- the first is done changing the
by
argument in theseq()
to 0.25 inscale_y_continuous()
- then we add a hline layer
ggplot(df,aes(x=knots,y=ed)) + geom_point(size=3) + geom_segment(aes(xend=end,yend=ed)) + geom_segment(x=min(df$knots),xend=-45,y=0,yend=0,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=max(df$knots),xend=1245,y=1,yend=1,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_hline(yintercept=c(0.25,0.5,0.75),linetype=2) + scale_x_continuous(limits=c(-45,1245),breaks=seq(0,1200,by=200),expand=c(0,0)) + scale_y_continuous(limits=c(-0.01,1.05),breaks=seq(0,1,by=0.25),expand=c(0,0)) + xlab("Mass (g)") + ylab("Empirical Distribution Function") + theme( panel.background=element_blank(), axis.line=element_line(colour="black"), axis.text=element_text(colour="black",size=14), axis.title=element_text(colour="black",size=14), axis.ticks=element_line(colour="black"), axis.ticks.length=unit(3,"mm") )
- we also can use the grid lines (but then we have also lines at 0 and 1
ggplot(df,aes(x=knots,y=ed)) + geom_point(size=3) + geom_segment(aes(xend=end,yend=ed)) + geom_segment(x=min(df$knots),xend=-45,y=0,yend=0,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=max(df$knots),xend=1245,y=1,yend=1,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + scale_x_continuous(limits=c(-45,1245),breaks=seq(0,1200,by=200),expand=c(0,0)) + scale_y_continuous(limits=c(-0.01,1.05),breaks=seq(0,1,by=0.25),expand=c(0,0)) + xlab("Mass (g)") + ylab("Empirical Distribution Function") + theme( panel.background=element_blank(), panel.grid.major.x=element_blank(), panel.grid.major.y=element_line(linetype = 2,colour="grey50"), axis.line=element_line(colour="black"), axis.text=element_text(colour="black",size=14), axis.title=element_text(colour="black",size=14), axis.ticks=element_line(colour="black"), axis.ticks.length=unit(3,"mm") )
1.3 Figure 4.19 EDF plot
- replace
geom_point()
bygeom_step()
- get rid of the horizontal lines
- add another little segment which connects the left arrow with the step function:
geom_segment(x=min(df$knots),xend=min(df$knots),y=0,yend=min(df$ed),size=0.4)
- leave everything as it is
ggplot(df,aes(x=knots,y=ed)) + geom_step(direction = "hv") + geom_segment(x=min(df$knots),xend=-45,y=0,yend=0,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=max(df$knots),xend=1245,y=1,yend=1,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=min(df$knots),xend=min(df$knots),y=0,yend=min(df$ed),size=0.4) + scale_x_continuous(limits=c(-45,1245),breaks=seq(0,1200,by=200),expand=c(0,0)) + scale_y_continuous(limits=c(-0.01,1.05),breaks=seq(0,1,by=0.2),expand=c(0,0)) + xlab("Mass (g)") + ylab("Empirical Distribution Function") + theme( panel.background=element_blank(), axis.line=element_line(colour="black"), axis.text=element_text(colour="black",size=14), axis.title=element_text(colour="black",size=14), axis.ticks=element_line(colour="black"), axis.ticks.length=unit(3,"mm") )
- instead of using the data frame created above you can use the original data (mass) and set
stat
toecdf
- but for the arrows you need to calculate the vals anyway, that's why I use the data frame above
df3 <- data.frame(mass=mass) ggplot(df,aes(x=knots,y=ed)) + geom_step(inherit.aes=F,stat="ecdf",data=df3,aes(x=mass)) + geom_segment(x=min(df$knots),xend=-45,y=0,yend=0,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=max(df$knots),xend=1245,y=1,yend=1,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=min(df$knots),xend=min(df$knots),y=0,yend=min(df$ed),size=0.4) + scale_x_continuous(limits=c(-45,1245),breaks=seq(0,1200,by=200),expand=c(0,0)) + scale_y_continuous(limits=c(-0.01,1.05),breaks=seq(0,1,by=0.2),expand=c(0,0)) + xlab("Mass (g)") + ylab("Empirical Distribution Function") + theme( panel.background=element_blank(), axis.line=element_line(colour="black"), axis.text=element_text(colour="black",size=14), axis.title=element_text(colour="black",size=14), axis.ticks=element_line(colour="black"), axis.ticks.length=unit(3,"mm") )
1.4 Figure 4.20 EDF plot
- the last plot only with the horizontal grid lines add the quartiles
ggplot(df,aes(x=knots,y=ed)) + geom_step(direction = "hv") + geom_segment(x=min(df$knots),xend=-45,y=0,yend=0,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=max(df$knots),xend=1245,y=1,yend=1,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=min(df$knots),xend=min(df$knots),y=0,yend=min(df$ed),size=0.4) + geom_hline(yintercept=c(0.25,0.5,0.75),linetype=2) + scale_x_continuous(limits=c(-45,1245),breaks=seq(0,1200,by=200),expand=c(0,0)) + scale_y_continuous(limits=c(-0.01,1.05),breaks=seq(0,1,by=0.25),expand=c(0,0)) + xlab("Mass (g)") + ylab("Empirical Distribution Function") + theme( panel.background=element_blank(), axis.line=element_line(colour="black"), axis.text=element_text(colour="black",size=14), axis.title=element_text(colour="black",size=14), axis.ticks=element_line(colour="black"), axis.ticks.length=unit(3,"mm") )
1.5 Figure 4.20 EDF plot with cumulative normal distribution function added
- first we need to create a second data frame containing the values defining the curve, we choose to use 10000 points on the x-axis and use
pnorm()
to calculate to respective y values (using the empirical mean and the empirical sd of the vectormass
)
- then we add the layer (
geom_line()
)
- change the limits of the x-axis and the breaks inside
scale_x_continuous()
- change the length of the right arrow (setting
xend
to 1500)
mean_mass<-mean(mass) sd_mass<-sd(mass) min_mass<-min(mass) max_mass<-1500 xx <- seq(0,10000,1)*(max_mass-min_mass)/10000.+min_mass yy <- pnorm(xx,mean_mass,sd_mass) df2 <- data.frame(xx=xx,yy=yy) ggplot(df,aes(x=knots,y=ed)) + geom_step(direction = "hv") + geom_segment(x=min(df$knots),xend=-45,y=0,yend=0,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=max(df$knots),xend=1500,y=1,yend=1,arrow=arrow(length = unit(0.15,"cm")),size=c(0.4)) + geom_segment(x=min(df$knots),xend=min(df$knots),y=0,yend=min(df$ed),size=0.4) + geom_line(data=df2,aes(x=xx,y=yy)) + scale_x_continuous(limits=c(-45,1500),breaks=seq(0,1500,by=500),expand=c(0,0)) + scale_y_continuous(limits=c(-0.01,1.05),breaks=seq(0,1,by=0.2),expand=c(0,0)) + xlab("Mass (g)") + ylab("Empirical Distribution Function") + theme( panel.background=element_blank(), axis.line=element_line(colour="black"), axis.text=element_text(colour="black",size=14), axis.title=element_text(colour="black",size=14), axis.ticks=element_line(colour="black"), axis.ticks.length=unit(3,"mm") )
No comments :
Post a Comment