I need to find the points at which an increasing or decreasing trend starts and ends. In this data, a difference of ~10 between consecutive values is considered noise (i.e.
Here's how to make the change point vector:
vec <- c(100312,100317,100380,100432,100438,100441,100509,100641,100779,100919,
100983,100980,100978,100983,100986,100885,100767,100758,100755,100755)
#this finds your trend start/stops
idx <- c(cumsum(rle(abs(diff(vec))>10)$lengths)+1)
#create new vector of change points:
newVec <- vec[idx]
print(newVec)
[1] 100317 100432 100441 100983 100986 100767 100755
#(opt.) to ignore the first and last observation as a change point:
idx <- idx[which(idx!=1 & idx!=length(vec))]
#update new vector if you want the "opt." restrictions applied:
newVec <- vec[idx]
print(newVec)
[1] 100317 100432 100441 100983 100986 100767
#you can split newVec by start/stop change points like this:
start_changepoints <- newVec[c(TRUE,FALSE)]
print(start_changepoints)
[1] 100317 100441 100986
end_changepoints <- newVec[c(FALSE,TRUE)]
print(end_changepoints)
[1] 100432 100983 100767
#to count the number of events, just measure the length of start_changepoints:
length(start_changepoints)
[1] 3
If you then want to plot that, you can use this:
require(ggplot2)
#preps data for plot
df <- data.frame(vec,trends=NA,cols=NA)
df$trends[idx] <- idx
df$cols[idx] <- c("green","red")
#plot
ggplot(df, aes(x=1:NROW(df),y=vec)) +
geom_line() +
geom_point() +
geom_vline(aes(xintercept=trends, col=cols),
lty=2, lwd=1) +
scale_color_manual(values=na.omit(df$cols),
breaks=na.omit(unique(df$cols)),
labels=c("Start","End")) +
xlab("Index") +
ylab("Value") +
guides(col=guide_legend("Trend State"))
Output: