Fast way to read xlsx files into R
Here is a small benchmark test. Results: readxl::read_xlsx
on average about twice as fast as openxlsx::read.xlsx
across different number of rows (n
) and columns (p
) using standard settings.
options(scipen=999) # no scientific number formatnn <- c(1, 10, 100, 1000, 5000, 10000, 20000, 30000)pp <- c(1, 5, 10, 20, 30, 40, 50)# create some excel filesl <- list() # save resultstmp_dir <- tempdir()for (n in nn) { for (p in pp) { name <- cat("\n\tn:", n, "p:", p) flush.console() m <- matrix(rnorm(n*p), n, p) file <- paste0(tmp_dir, "/n", n, "_p", p, ".xlsx") # write write.xlsx(m, file) # read elapsed <- system.time( x <- openxlsx::read.xlsx(file) )["elapsed"] df <- data.frame(fun = "openxlsx::read.xlsx", n = n, p = p, elapsed = elapsed, stringsAsFactors = F, row.names = NULL) l <- append(l, list(df)) elapsed <- system.time( x <- readxl::read_xlsx(file) )["elapsed"] df <- data.frame(fun = "readxl::read_xlsx", n = n, p = p, elapsed = elapsed, stringsAsFactors = F, row.names = NULL) l <- append(l, list(df)) }}# results d <- do.call(rbind, l)library(ggplot2)ggplot(d, aes(n, elapsed, color= fun)) + geom_line() + geom_point() + facet_wrap( ~ paste("columns:", p)) + xlab("Number of rows") + ylab("Seconds")