I have some weird behaviour of violin plots, when the data is (in parts) constant.
If I check for constant data and add some small erro
I finally managed to get a violin plot with some group(s) having zero variance (standard deviation)
In my example I have 3 groups of data - two without zero variance and the third is constant. While accumulating the groups, I calculate the standard deviation (variance would be same functionality)
library(ggplot2)
library(gridExtra)
N <- 20
test_data <- data.frame()
# random data from range
for( grp_id in 1:2)
{
group_data <- data.frame(
idx = 1:N,
vals = runif(N, grp_id, grp_id + 1),
type = paste("range", grp_id)
)
group_data$sd_group <- sd( group_data$vals)
test_data = rbind( test_data, group_data)
}
# constant data
group_data = data.frame(
idx = 1:N,
vals = rep( 0.5, N),
type = "const"
)
group_data$sd_group <- sd( group_data$vals)
as suggested I add a little offset to obtain a violin plot for group 'const'
# add a little jittering to get the flat line
if( 0 == group_data$sd_group[1])
{
group_data$vals[1] = group_data$vals[1] + 0.00001
}
test_data = rbind( test_data, group_data)
Only thing now left to do is to scale all violin plots to the same width
grid.arrange(
ggplot( test_data, aes( x = idx)) +
geom_line( aes( y = vals, colour = type)),
ggplot( test_data, aes( x = type, y = vals, fill = type)) +
geom_violin( scale = "width"),
ncol = 1
)