Thursday, July 9, 2015

label outlier in ggplot2 boxplot

  • function to add labels to outliers in a ggplot2 boxplot
  • the function add.outlier() takes a ggplot boxplot object as input
  • the second optional input is a string containing the name of the variable containing the labels, the default is the value itself
  • the function expects a unique mapping to x and y, where x is a factor variable
  • the data frame given to the ggplot object must contain the x, y, and the labelling variable
require(ggplot2)

mtcars$cyl <- factor(mtcars$cyl)
mtcars$labels <- row.names(mtcars)

p <- ggplot(mtcars,aes(x=cyl,colour=cyl,y=qsec)) +
    geom_boxplot()



add.outlier <- function(p,labvar = as.character(p$mapping$y)){
      df <- data.frame(y = with(p$data,eval(p$mapping$y)),
                       x = with(p$data,eval(p$mapping$x)))
  
      df.l <- split(df,df$x)
      
      mm <- Reduce(rbind, lapply(df.l,FUN = function(df){
                                     data.frame(y = df$y[df$y <= (quantile(df$y)[2] - 1.5 * IQR(df$y)) | df$y >= (quantile(df$y)[4] + 1.5 * IQR(df$y))],
                                                x = df$x[df$y <= (quantile(df$y)[2] - 1.5 * IQR(df$y)) | df$y >= (quantile(df$y)[4] + 1.5 * IQR(df$y))]
                                                )})
                   )
  
      
      mm$x <- factor(mm$x,levels=sort(as.numeric(as.character(unique(p$data[,as.character(p$mapping$x)])))),
                     labels = levels(p$data[,as.character(p$mapping$x)])
                     )
      
      names(mm) <- c(as.character(p$mapping$y),as.character(p$mapping$x))
      mm <- merge(p$data[,c(names(mm),labvar)],mm)
      
      p + geom_text(data=mm,
                    aes_string(label=labvar),
                    vjust = -0.5)
}

add.outlier(p)





add.outlier(p,"labels")