Is there a function to create a repeating list of letters in R?
something like
letters[1:30]
[1] \"a\" \"b\" \"c\" \"d\" \"e\" \"f\" \"g\" \"h\" \"i
It's not too difficult to piece together a quick function to do something like this:
myLetters <- function(length.out) {
a <- rep(letters, length.out = length.out)
grp <- cumsum(a == "a")
vapply(seq_along(a),
function(x) paste(rep(a[x], grp[x]), collapse = ""),
character(1L))
}
myLetters(60)
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l"
# [13] "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x"
# [25] "y" "z" "aa" "bb" "cc" "dd" "ee" "ff" "gg" "hh" "ii" "jj"
# [37] "kk" "ll" "mm" "nn" "oo" "pp" "qq" "rr" "ss" "tt" "uu" "vv"
# [49] "ww" "xx" "yy" "zz" "aaa" "bbb" "ccc" "ffffd" "eee" "fff" "ggg" "hhh"
A function to produce Excel-style column names, i.e.
# A, B, ..., Z, AA, AB, ..., AZ, BA, BB, ..., ..., ZZ, AAA, ...
letterwrap <- function(n, depth = 1) {
args <- lapply(1:depth, FUN = function(x) return(LETTERS))
x <- do.call(expand.grid, args = list(args, stringsAsFactors = F))
x <- x[, rev(names(x)), drop = F]
x <- do.call(paste0, x)
if (n <= length(x)) return(x[1:n])
return(c(x, letterwrap(n - length(x), depth = depth + 1)))
}
letterwrap(26^2 + 52) # through AAZ
Initially I thought this would best be done cleverly by converting to base 26, but that doesn't work. The issue is that Excel column names aren't base 26, which took me a long time to realize. The catch is 0: if you try to map a letter (like A
) to 0, you've got a problem when you want to distinguish between A
and AA
and AAA
...
Another way to illustrate the problem is in "digits". In base 10, there are 10 single-digit numbers (0-9), then 90 double-digit numbers (10:99), 900 three-digit numbers... generalizing to 10^d - 10^(d - 1)
numbers with d
digits for d > 1
. However, in Excel column names there are 26 single-letter names, 26^2 double-letter names, 26^3 triple-letter names, with no subtraction.
I'll leave this code as a warning to others:
## Converts a number to base 26, returns a vector for each "digit"
b26 <- function(n) {
stopifnot(n >= 0)
if (n <= 1) return(n)
n26 <- rep(NA, ceiling(log(n, base = 26)))
for (i in seq_along(n26)) {
n26[i] <- (n %% 26)
n <- n %/% 26
}
return(rev(n26))
}
## Returns the name of nth value in the sequence
## A, B, C, ..., Z, AA, AB, AC, ..., AZ, BA, ...
letterwrap1 <- function(n, lower = FALSE) {
let <- if (lower) letters else LETTERS
base26 <- b26(n)
base26[base26 == 0] <- 26
paste(let[base26], collapse = "")
}
## Vectorized version of letterwrap
letter_col_names <- Vectorize(letterwrap, vectorize.args="n")
> letter_col_names(1:4)
[1] "A" "B" "C" "D"
> letter_col_names(25:30)
[1] "Y" "Z" "AA" "AB" "AC" "AD"
# Looks pretty good
# Until we get here:
> letter_col_names(50:54)
[1] "AX" "AY" "BZ" "BA" "BB"
Probably not the cleanest, but easy to see what's happening:
foo<-letters[1:26]
outlen <- 73 # or whatever length you want
oof <- vector(len=26)
for ( j in 2:(outlen%/%26)) {
for (k in 1:26) oof[k] <- paste(rep(letters[k],j),sep='',collapse='')
foo<-c(foo,oof)
}
for (jj in 1:(outlen%%26) ) foo[(26*j)+jj]<-paste(rep(letters[jj],(j+1)),sep='',collapse='')
foo
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n"
[15] "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" "aa" "bb"
[29] "cc" "dd" "ee" "ff" "gg" "hh" "ii" "jj" "kk" "ll" "mm" "nn" "oo" "pp"
[43] "qq" "rr" "ss" "tt" "uu" "vv" "ww" "xx" "yy" "zz" "aaa" "bbb" "ccc" "ffffd"
[57] "eee" "fff" "ggg" "hhh" "iii" "jjj" "kkk" "lll" "mmm" "nnn" "ooo" "ppp" "qqq" "rrr"
[71] "sss" "ttt" "uuu"
EDIT: Matthew wins, hands-down:
microbenchmark(anandaLetters(5000),matthewletters(5000),carlletters(5000),times=10)
Unit: milliseconds
expr min lq median uq max neval
anandaLetters(5000) 85.339200 85.567978 85.9827715 86.260298 86.612231 10
matthewletters(5000) 3.413706 3.503506 3.9067535 3.946950 4.106453 10
carlletters(5000) 94.893983 95.405418 96.4492430 97.234784 110.681780 10
Let me do a little correction on seq "AY" "BZ". You have to rest out one letter to the previous digiletter.
colExcel2num <- function(x) {
p <- seq(from = nchar(x) - 1, to = 0)
y <- utf8ToInt(x) - utf8ToInt("A") + 1L
S <- sum(y * 26^p)
return(S)
}
## Converts a number to base 26, returns a vector for each "digit"
b26 <- function(n) {
stopifnot(n >= 0)
if (n <= 1) return(n)
n26 <- rep(NA, ceiling(log(n, base = 26)))
for (i in seq_along(n26)) {
n26[i] <- (n %% 26)
n <- n %/% 26
}
return(rev(n26))
}
## Retorna el nombre de columna Excel según la posición de columna
## A, B, C, ..., Z, AA, AB, AC, ..., AZ, BA, ...
colnum2Excel <- function(n, lower = FALSE) {
let <- if (lower) letters else LETTERS
base26 <- b26(n)
i <- base26 == 0
base26[i] <- 26
base26[lead(i, default = FALSE)] <- base26[lead(i, default = FALSE)] - 1
paste(let[base26], collapse = "")
}
## Return df's column index based on column name
## A, B, C, ..., Z, AA, AB, AC, ..., AZ, BA, ...
## buscando el número de columna en el df
varnum2Excel <- function(df, colname, lower = FALSE) {
index <- match(colname, names(df))
stopifnot(index > 0)
return(colnum2Excel(index))
}
Here some example:
require(openxlsx)
table <- data.frame(milk = c(1,2,3), oranges = c(2,4,6))
table <- table %>%
mutate(
ajjhh = sprintf(paste0(
varnum2Excel(.,"milk"), "%1$s", " + ",
varnum2Excel(.,"oranges"),"%1$s"),
2:(n()+1)
)
)
class(table$ajjhh) <- c(class(table$ajjhh), "formula")
wb <- createWorkbook()
addWorksheet(wb = wb, sheetName = "Sheet1", tabColour = "chocolate4")
writeData (wb, "Sheet1", x = table)
saveWorkbook(wb, "formulashasnotgone.xlsx", overwrite = TRUE)
If you just want unique names, you could use
make.unique(rep(letters, length.out = 30), sep='')
Edit:
Here's another way to get repeating letters using Reduce
.
myletters <- function(n)
unlist(Reduce(paste0,
replicate(n %/% length(letters), letters, simplify=FALSE),
init=letters,
accumulate=TRUE))[1:n]
myletters(60)
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l"
# [13] "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x"
# [25] "y" "z" "aa" "bb" "cc" "dd" "ee" "ff" "gg" "hh" "ii" "jj"
# [37] "kk" "ll" "mm" "nn" "oo" "pp" "qq" "rr" "ss" "tt" "uu" "vv"
# [49] "ww" "xx" "yy" "zz" "aaa" "bbb" "ccc" "ffffd" "eee" "fff" "ggg" "hhh"
There is almost certainly a better way, but this is what I ended up with:
letter_wrap <- function(idx) {
vapply(
idx,
function(x)
paste0(
rep(
letters[replace(x %% 26, !x %% 26, 26)], 1 + (x - 1) %/% 26 ), collapse=""), "")
}
letter_wrap(1:60)
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n"
# [15] "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" "aa" "bb"
# [29] "cc" "dd" "ee" "ff" "gg" "hh" "ii" "jj" "kk" "ll" "mm" "nn" "oo" "pp"
# [43] "qq" "rr" "ss" "tt" "uu" "vv" "ww" "xx" "yy" "zz" "aaa" "bbb" "ccc" "ffffd"
# [57] "eee" "fff" "ggg" "hhh"
EDIT: failed to notice Ananda's answer before I posted this one. This one is different enough that I'm leaving it. Note it takes the index vector as an input, as opposed to the number of items.