Override function in R package

|▌冷眼眸甩不掉的悲伤 提交于 2019-12-11 06:16:35

问题


So, I'm trying to make a modification in ctree (part of the partykit) package. Specifically, I want to delete an object in the global environment and run gc() to help conserve memory (R runs super slow when it gets to the point of using Windows page file). I made it as far as using fixInNamespace:

fixInNamespace(ctree,"partykit")

I noticed that my change was not working, so I even went to the extent of doing this as the replacement code:

function(formula, data, weights, subset, na.action = na.pass,
                  control = ctree_control(...), ytrafo = NULL,
                  scores = NULL, ...) {

    return("foo")
}

I've also tried using this:

tmpfun <- get("ctree", envir = asNamespace("partykit"))
environment(ctree) <- environment(tmpfun)
attributes(ctree) <- attributes(tmpfun)  # don't know if this is really needed
assignInNamespace("ctree", ctree, ns="partykit")

No matter what I seem to do, I'm stuck with the library version of ctree. BTW, I'm using RStudio 0.98.507 and R 3.1.1 on Windows 8.1.

Does this have something to do with the external C code in the .ctree_fit call?

Also, before we go down the road of "R only copies on write..." I've already verified that we end up with multiple copies of the data set. See:

> d2<-iris
> tracemem(iris)
[1] "<0x0000000019c7f5f8>"
> tracemem(d2)
[1] "<0x0000000019c7f5f8>"
> cttest<-ctree(Species~.,data=d2)
> tracemem(cttest$data)
[1] "<0x0000000008af8e30>"

Thanks for the post so far, but when I try what I'm trying, I get the following error:

> cttest<-ctree(Species~.,data=d2)
Error in environment(partykit) : object 'partykit' not found

Here's a longer code fragment that shows what I'm trying to achieve:

require(partykit)

ctree(Species~.,data=iris)

package_name<-"partykit"
function_name<-"ctree"


#
# Borrowed: https://github.com/robertzk/testthatsomemore/blob/master/R/stub.R
#

namespaces <-
  list(as.environment(paste0('package:', package_name)),
       getNamespace(package_name))
if (!exists(function_name, envir = namespaces[[1]], inherits = FALSE))
  namespaces <- namespaces[-1]
if (!exists(function_name, envir = tail(namespaces,1)[[1]], inherits = FALSE))
  stop(gettextf("Cannot stub %s::%s because it must exist in the package",
                package_name, function_name))
lapply(namespaces, unlockBinding, sym = function_name)
# Clean up our stubbing on exit
previous_object <- get(function_name, envir = tail(namespaces,1)[[1]])
on.exit({
  lapply(namespaces, function(ns) {
    tryCatch(error = function(.) NULL, assign(function_name, previous_object, envir = ns))
    lockBinding(function_name, ns)
  })
})
lapply(namespaces, function(ns)
  assign(function_name, 
         #
         # Modified ctree - kill original data variable prior to running longer-running algorithm
         #

         function(formula, data, weights, subset, na.action = na.pass,
                                 control = ctree_control(...), ytrafo = NULL,
                                 scores = NULL, ...) {



    if (missing(data))
      data <- environment(formula)
    mf <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "weights", "na.action"),
               names(mf), 0)
    mf <- mf[c(1, m)]

    ### only necessary for extended model formulae 
    ### e.g. multivariate responses
    formula <- Formula::Formula(formula)
    mf$formula <- formula
    mf$drop.unused.levels <- FALSE
    mf$na.action <- na.action
    mf[[1]] <- as.name("model.frame")
    mf <- eval(mf, parent.frame())

    response <- names(Formula::model.part(formula, mf, lhs = 1))
    weights <- model.weights(mf)
    dat <- mf[, colnames(mf) != "(weights)"]
    if (!is.null(scores)) {
      for (n in names(scores)) {
        sc <- scores[[n]]
        if (is.ordered(dat[[n]]) &&
              nlevels(dat[[n]]) == length(sc)) {
          attr(dat[[n]], "scores") <- as.numeric(sc)
        } else {
          warning("scores for variable ", sQuote(n), " ignored")
        }
      }
    }

    if (is.null(weights))
      weights <- rep(1, nrow(mf))
    storage.mode(weights) <- "integer"

    nvar <- sum(!(colnames(dat) %in% response))

    control$cfun <- function(...) {
      if (control$teststat == "quad")
        p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
      if (control$teststat == "max")
        p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
      names(p) <- c("statistic", "p.value")

      if (control$testtype == "Bonferroni")
        p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
      crit <-  p["statistic"]
      if (control$testtype != "Teststatistic")
        crit <- p["p.value"]
      c(crit, p)
    }

    #require(partykit)
    environment(partykit)

    if (!is.null(get("delvar",envir=globalenv()))) {
      eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
    }


    tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
                       ytrafo = ytrafo)

    fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
                         "(weights)" = weights,
                         check.names = FALSE)
    fitted[[3]] <- dat[, response, drop = length(response) == 1]
    names(fitted)[3] <- "(response)"
    ret <- party(tree, data = dat, fitted = fitted)
    class(ret) <- c("constparty", class(ret))

    ### doesn't work for Surv objects
    # ret$terms <- terms(formula, data = mf)
    ret$terms <- terms(mf)
    ### need to adjust print and plot methods
    ### for multivariate responses
    ### if (length(response) > 1) class(ret) <- "party"
    return(ret)
  }
  , envir = ns))

#
# End Borrowed
#


d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)

UPDATE: I found a possible solution, but I'm hoping that someone has a cleaner way of doing this. I downloaded the source code for the partykit package and wrote a script to import everything into the global environment (except for the compiled C functions that were installed when the partykit package was installed from CRAN)

Here's basically where I ended up:

files<-c("as.party.R",
         "ctree.R",
         "glmtree.R",
         "lmtree.R",
         "mob-plot.R",
         "mob-pvalue.R",
         "modelparty.R",
         "node.R",
         "party.R",
         "plot.R",
         "pmmlTreeModel.R",
         "print.R",
         "simpleparty.R",
         "split.R",
         "utils.R")

for ( i in 1:length(files)) {
    source(paste("c:\\cygwin64\\home\\Mike\\partykit\\R\\",files[i],sep=""))

}

ctree <- function(formula, data, weights, subset, na.action = na.pass,
                  control = ctree_control(...), ytrafo = NULL,
                  scores = NULL, ...) {



  if (missing(data))
    data <- environment(formula)
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "weights", "na.action"),
             names(mf), 0)
  mf <- mf[c(1, m)]

  ### only necessary for extended model formulae 
  ### e.g. multivariate responses
  formula <- Formula::Formula(formula)
  mf$formula <- formula
  mf$drop.unused.levels <- FALSE
  mf$na.action <- na.action
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())

  response <- names(Formula::model.part(formula, mf, lhs = 1))
  weights <- model.weights(mf)
  dat <- mf[, colnames(mf) != "(weights)"]
  if (!is.null(scores)) {
    for (n in names(scores)) {
      sc <- scores[[n]]
      if (is.ordered(dat[[n]]) &&
            nlevels(dat[[n]]) == length(sc)) {
        attr(dat[[n]], "scores") <- as.numeric(sc)
      } else {
        warning("scores for variable ", sQuote(n), " ignored")
      }
    }
  }

  if (is.null(weights))
    weights <- rep(1, nrow(mf))
  storage.mode(weights) <- "integer"

  nvar <- sum(!(colnames(dat) %in% response))

  control$cfun <- function(...) {
    if (control$teststat == "quad")
      p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
    if (control$teststat == "max")
      p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
    names(p) <- c("statistic", "p.value")

    if (control$testtype == "Bonferroni")
      p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
    crit <-  p["statistic"]
    if (control$testtype != "Teststatistic")
      crit <- p["p.value"]
    c(crit, p)
  }

  #require(partykit)
  #environment(partykit)

  if (!is.null(get("delvar",envir=globalenv()))) {
    eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
  }


  tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
                     ytrafo = ytrafo)

  fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
                       "(weights)" = weights,
                       check.names = FALSE)
  fitted[[3]] <- dat[, response, drop = length(response) == 1]
  names(fitted)[3] <- "(response)"
  ret <- party(tree, data = dat, fitted = fitted)
  class(ret) <- c("constparty", class(ret))

  ### doesn't work for Surv objects
  # ret$terms <- terms(formula, data = mf)
  ret$terms <- terms(mf)
  ### need to adjust print and plot methods
  ### for multivariate responses
  ### if (length(response) > 1) class(ret) <- "party"
  return(ret)
}

d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)

cttest

回答1:


It works on my system. You might need to call unlockBinding first. This is what the testthatsomemore package does under the hood; see if that works for you.

install_github('robertzk/testthatsomemore')
testthatsomemore::package_stub("partykit", "ctree", function(...) return("foo"), {
  # Your code that makes use of partykit::ctree should go here. The below will print "foo"
  print(partykit::ctree("I have been overwritten"))
})

You can of course put the modified function in the third argument instead of the stub above.



来源:https://stackoverflow.com/questions/26598547/override-function-in-r-package

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!