Display regression equation and R^2 for each scatter plot when using facet_wrap

前端 未结 2 1735
名媛妹妹
名媛妹妹 2020-12-28 23:44

I have a data.frame (which I melted using the melt function), from which I produce multiple scatter plots and fit a regression line using the following:

ggpl         


        
相关标签:
2条回答
  • 2020-12-28 23:51

    Nice solution. I'm surprised ggplot doesn't have a function built in to do this... I needed to display equations and R2 values from polynomial fits (generated by the ns(x,order) function in the splines package), and have expanded your lm_eqn function to accomodate polynomials of varying orders.

    Disclaimer: I'm still quite new to R coding, and I'm aware that this code is very messy. There must be a nicer way to do it, and I'm going to start another thread to ask people to refine the code, and possibly expand it to other fit models... You can follow it here: https://groups.google.com/forum/?fromgroups#!forum/ggplot2

    lm_eqn = function(df,x.var,y.var,signif.figs,eq.plot=T,model.type,order){
      if(missing(x.var) | missing(y.var) | class(x.var)!='character' | class(y.var)!='character') stop('x.var and y.var must be the names of the columns you want to use as x and y as a character string.' )
      if(missing(model.type)) stop("model.type must be 'lin' (linear y~x model) or 'poly' (polynomial y~ns(x,order) model, generated by splines package).")
      if(model.type=='poly' & missing(order)) stop("order must be specified if poly method is used.")
    
      if(eq.plot==T) {
        # Linear y=mx+c equation
        if(model.type=='lin') {
          fit = lm(df[[y.var]] ~ df[[x.var]]);
          eq <- substitute(italic(y) == c + m %.% italic(x)*","~~italic(r)^2~"="~r2, 
                           list(c = signif(coef(fit)[1], signif.figs), 
                                m = signif(coef(fit)[2], signif.figs), 
                                r2 = signif(summary(fit)$r.squared, signif.figs)))
          as.character(as.expression(eq));
        }
        # polynomial expression generated with the ns(x,order) function [splines package]
        if(model.type=='poly') {
          fit = lm(df[[y.var]] ~ ns(df[[x.var]],order));
    
          base = gsub('!c!',signif(coef(fit)[1],signif.figs),"italic(y) == !c! + ")
          element.1 = "!m! %.% italic(x)~"
          element.2 = " + !m! %.% italic(x)^!o!~"
          element.r2 = gsub('!r2!',signif(summary(fit)$r.squared,signif.figs),"~~italic(r)^2~\"=\"~!r2!")
          eq=""
    
          for(o in 1:(order)) {
            if(o==1) {
              if(coef(fit)[(o+1)]<0) tmp=gsub("[+]","",base) else tmp=base
              eq=paste(tmp,gsub('!m!',signif(coef(fit)[(o+1)],signif.figs),element.1),sep="")
            }
            if(o>1) {
              if(coef(fit)[(o+1)]<0) tmp=gsub("[+]","",element.2) else tmp=element.2
              eq=paste(eq,gsub('!o!',o,gsub('!m!',signif(coef(fit)[(o+1)],signif.figs),tmp)),sep="")
            }
            if(o==(order)) eq=paste(eq,"\",\"",element.r2,sep="")
          }
        }
      }
      if(eq.plot==F) {
        # Linear y=mx+c equations
        if(model.type=='lin') {
          fit = lm(df[[y.var]] ~ df[[x.var]]);
          eq <- substitute(italic(r)^2~"="~r2, 
                           list(r2 = signif(summary(fit)$r.squared, signif.figs)))
          as.character(as.expression(eq));
        }
        # polynomial expression generated with the ns() function [splines package]
        if(model.type=='poly') {
          fit = lm(df[[y.var]] ~ ns(df[[x.var]],order));
    
          eq = gsub('!r2!',signif(summary(fit)$r.squared,signif.figs),"italic(r)^2~\"=\"~!r2!")
        }
      }
      return(eq)
    }
    
    0 讨论(0)
  • 2020-12-29 00:12

    I actually solved this, please see below a worked out example where the dependent variable is var1. This was a time series dataset, please ignore the date part if not relevant for your problem.

    library(plyr)
    library(ggplot2)
    
    rm(dat)
    dat <- read.table("data.txt", header = TRUE, sep = ",")
    dat <- transform(dat, date = as.POSIXct(strptime(date, "%Y-%m-%dT%H:%M:%OS")))
    
    rm(dat.m)
    dat.m <- melt(dat, id = c('ccy','date','var1'))
    
    lm_eqn = function(df){
      m = lm(var1 ~ value, df);
      eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                       list(a = format(coef(m)[1], digits = 2), 
                            b = format(coef(m)[2], digits = 2), 
                            r2 = format(summary(m)$r.squared, digits = 3)))
      as.character(as.expression(eq));                 
    }
    
    mymax = function(df){
      max(df$value)
    }
    
    rm(regs)
    regs <- ddply(dat.m, .(ccy,variable), lm_eqn)
    regs.xpos <- ddply(dat.m, .(variable), function(df) (min(df$value)+max(df$value))/2)
    regs.ypos <- ddply(dat.m, .(ccy,variable), function(df) min(df$var1) + 0.05*(max(df$var1)-min(df$var1)))
    
    regs$y <- regs.ypos$V1
    regs$x <- regs.xpos$V1
    
    rm(gp)
    gp <- ggplot(data=dat.m, aes(value, var1)) + geom_point(size = 1, alpha=0.75) + geom_smooth() + geom_smooth(method="lm", se=FALSE, color="red") + geom_text(data=regs, size=3, color="red", aes(x=x, y=y, label=V1), parse=TRUE) + facet_grid(ccy~variable, scales="free")
    ggsave("data.png", gp, scale=1.5, width=11, height=8)
    
    0 讨论(0)
提交回复
热议问题