read.00Index <- function(file) { if(is.character(file)) { if(file == "") file <- stdin() else { file <- file(file, "r") on.exit(close(file)) } } if(!inherits(file, "connection")) stop(gettextf("argument '%s' must be a character string or connection", file), domain = NA) y <- matrix("", nr = 0, nc = 2) x <- paste(readLines(file), collapse = "\n") ## ## We cannot necessarily assume that the 00Index-style file to be ## read in was generated by @code{Rdindex()} or by R using ## formatDL(style = "table"). In particular, some packages have ## 00Index files with (section) headers and footers in addition to ## the data base chunks which are description lists rendered in ## tabular form. Hence, we need some heuristic for identifying the ## db chunks. Easy to the human eye (is there a column for aligning ## entries?) but far from trivial ... as a first approximation we ## try to consider chunks containing at least one tab or three ## spaces a db chunk. (A better heuristic would be the following: ## entries rendered in one line have item and description separated ## by at least 3 spaces or tabs; entries with a line break have ## continuation lines starting with whitespace (no test whether for ## alignment). If a chunk is made of such entries only it is ## considered a db chunk. But not all current packages follow this ## scheme. Argh.) ## Clearly we need to move to something better in future versions. ## ## First split into paragraph chunks separated by whitespace-only ## lines. for(chunk in unlist(strsplit(x, "\n[ \t\n]*\n"))) { entries <- tryCatch({ if(regexpr("\( \|\t\)", chunk) == -1) NULL else { ## Combine entries with continuation lines. chunk <- gsub("\n[ \t]+", "\t", chunk) ## Split into lines and then according to whitespace. x <- strsplit(unlist(strsplit(chunk, "\n")), "[ \t]") cbind(unlist(lapply(x, "[[", 1)), unlist(lapply(x, function(t) { paste(t[-c(1, which(nchar(t) == 0))], collapse = " ") }))) } }, error = .identity) if(!inherits(entries, "error") && NCOL(entries) == 2) y <- rbind(y, entries) } colnames(y) <- c("Item", "Description") y }