I have a vector that looks something like this:
c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
Suppose I want to copy the values on positions 1, 6
Here's one way:
zero.locf <- function(x) {
if (x[1] == 0) stop('x[1] should not be 0')
with(rle(x), {
no.0 <- replace(values, values == 0, values[(values == 0) - 1])
rep(no.0, lengths)
})
}
x <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
zero.locf(x)
# [1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4
rle(x)
returns a list
with items values
and lengths
.
rle(x)
Run Length Encoding
lengths: int [1:6] 1 4 1 4 1 4
values : num [1:6] 0.5 0 0.7 0 0.4 0
with
opens up this list
and lets us reference these entries directly.
Another possibility:
vec <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
library(zoo)
vec[vec==0] <- NA
na.locf(vec)
#[1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4
Here is an alternative using approx
dat <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
not.0 <- (dat != 0)
approx(which(not.0), dat[not.0], xout = seq(along.with = dat), method = "constant", yleft = 0, rule = 1:2)
# $x
# [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# $y
# [1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4
And here is an alternative that relies on the stated structure of the initial vector (repetitions of a non-zero value followed by 4 zeros). It adresses the speed issue but at the cost of flexibility.
dat <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
rep(dat[seq(1, length(dat), by = 5)], each = 5)
Here's another base R approach. Initial zeros are left as is:
v = c(0,1,2,-2.1,0,3,0,0.4,0,0)
v[v!=0] = diff(c(0, v[v!=0]))
cumsum(v)
# [1] 0.0 1.0 2.0 -2.1 -2.1 3.0 3.0 0.4 0.4 0.4
And here are some benchmarks:
roland = function(v) {v[v == 0] <- NA; na.locf(v)}
mp = function(x) {with(rle(x), rep(replace(values, values==0, values[which(values == 0)-1]), lengths))}
quant = function(dat) {not.0 <- (dat != 0); approx(which(not.0), dat[not.0], xout = seq(along.with = dat), method = "constant", rule = 2)}
eddi = function(v) {v[v!=0] = diff(c(0, v[v!=0])); cumsum(v)}
v = sample(c(-10:10, 0), 1e6, TRUE)
microbenchmark(roland(v), mp(v), quant(v), eddi(v), times = 10)
#Unit: milliseconds
# expr min lq median uq max neval
# roland(v) 595.1630 625.7692 638.4395 650.4758 664.9224 10
# mp(v) 410.8224 433.6775 469.9346 496.6328 528.3218 10
# quant(v) 646.1775 753.0684 759.9805 838.4281 883.3383 10
# eddi(v) 265.8064 286.2922 316.7022 339.0333 354.0836 10
I'd probably loop through every single element greater 0 using lapply
, then apply rep
function to repeat each of these values 5 times and merge the resulting list entries via do.call("c", ...)
.
do.call("c", lapply(which(tmp > 0), function(i) rep(tmp[i], 5)))
[1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4