问题
So, I'm trying to make a modification in ctree (part of the partykit) package. Specifically, I want to delete an object in the global environment and run gc() to help conserve memory (R runs super slow when it gets to the point of using Windows page file). I made it as far as using fixInNamespace:
fixInNamespace(ctree,"partykit")
I noticed that my change was not working, so I even went to the extent of doing this as the replacement code:
function(formula, data, weights, subset, na.action = na.pass,
control = ctree_control(...), ytrafo = NULL,
scores = NULL, ...) {
return("foo")
}
I've also tried using this:
tmpfun <- get("ctree", envir = asNamespace("partykit"))
environment(ctree) <- environment(tmpfun)
attributes(ctree) <- attributes(tmpfun) # don't know if this is really needed
assignInNamespace("ctree", ctree, ns="partykit")
No matter what I seem to do, I'm stuck with the library version of ctree. BTW, I'm using RStudio 0.98.507 and R 3.1.1 on Windows 8.1.
Does this have something to do with the external C code in the .ctree_fit call?
Also, before we go down the road of "R only copies on write..." I've already verified that we end up with multiple copies of the data set. See:
> d2<-iris
> tracemem(iris)
[1] "<0x0000000019c7f5f8>"
> tracemem(d2)
[1] "<0x0000000019c7f5f8>"
> cttest<-ctree(Species~.,data=d2)
> tracemem(cttest$data)
[1] "<0x0000000008af8e30>"
Thanks for the post so far, but when I try what I'm trying, I get the following error:
> cttest<-ctree(Species~.,data=d2)
Error in environment(partykit) : object 'partykit' not found
Here's a longer code fragment that shows what I'm trying to achieve:
require(partykit)
ctree(Species~.,data=iris)
package_name<-"partykit"
function_name<-"ctree"
#
# Borrowed: https://github.com/robertzk/testthatsomemore/blob/master/R/stub.R
#
namespaces <-
list(as.environment(paste0('package:', package_name)),
getNamespace(package_name))
if (!exists(function_name, envir = namespaces[[1]], inherits = FALSE))
namespaces <- namespaces[-1]
if (!exists(function_name, envir = tail(namespaces,1)[[1]], inherits = FALSE))
stop(gettextf("Cannot stub %s::%s because it must exist in the package",
package_name, function_name))
lapply(namespaces, unlockBinding, sym = function_name)
# Clean up our stubbing on exit
previous_object <- get(function_name, envir = tail(namespaces,1)[[1]])
on.exit({
lapply(namespaces, function(ns) {
tryCatch(error = function(.) NULL, assign(function_name, previous_object, envir = ns))
lockBinding(function_name, ns)
})
})
lapply(namespaces, function(ns)
assign(function_name,
#
# Modified ctree - kill original data variable prior to running longer-running algorithm
#
function(formula, data, weights, subset, na.action = na.pass,
control = ctree_control(...), ytrafo = NULL,
scores = NULL, ...) {
if (missing(data))
data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action"),
names(mf), 0)
mf <- mf[c(1, m)]
### only necessary for extended model formulae
### e.g. multivariate responses
formula <- Formula::Formula(formula)
mf$formula <- formula
mf$drop.unused.levels <- FALSE
mf$na.action <- na.action
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response <- names(Formula::model.part(formula, mf, lhs = 1))
weights <- model.weights(mf)
dat <- mf[, colnames(mf) != "(weights)"]
if (!is.null(scores)) {
for (n in names(scores)) {
sc <- scores[[n]]
if (is.ordered(dat[[n]]) &&
nlevels(dat[[n]]) == length(sc)) {
attr(dat[[n]], "scores") <- as.numeric(sc)
} else {
warning("scores for variable ", sQuote(n), " ignored")
}
}
}
if (is.null(weights))
weights <- rep(1, nrow(mf))
storage.mode(weights) <- "integer"
nvar <- sum(!(colnames(dat) %in% response))
control$cfun <- function(...) {
if (control$teststat == "quad")
p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
if (control$teststat == "max")
p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
names(p) <- c("statistic", "p.value")
if (control$testtype == "Bonferroni")
p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
crit <- p["statistic"]
if (control$testtype != "Teststatistic")
crit <- p["p.value"]
c(crit, p)
}
#require(partykit)
environment(partykit)
if (!is.null(get("delvar",envir=globalenv()))) {
eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
}
tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
ytrafo = ytrafo)
fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
"(weights)" = weights,
check.names = FALSE)
fitted[[3]] <- dat[, response, drop = length(response) == 1]
names(fitted)[3] <- "(response)"
ret <- party(tree, data = dat, fitted = fitted)
class(ret) <- c("constparty", class(ret))
### doesn't work for Surv objects
# ret$terms <- terms(formula, data = mf)
ret$terms <- terms(mf)
### need to adjust print and plot methods
### for multivariate responses
### if (length(response) > 1) class(ret) <- "party"
return(ret)
}
, envir = ns))
#
# End Borrowed
#
d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)
UPDATE: I found a possible solution, but I'm hoping that someone has a cleaner way of doing this. I downloaded the source code for the partykit package and wrote a script to import everything into the global environment (except for the compiled C functions that were installed when the partykit package was installed from CRAN)
Here's basically where I ended up:
files<-c("as.party.R",
"ctree.R",
"glmtree.R",
"lmtree.R",
"mob-plot.R",
"mob-pvalue.R",
"modelparty.R",
"node.R",
"party.R",
"plot.R",
"pmmlTreeModel.R",
"print.R",
"simpleparty.R",
"split.R",
"utils.R")
for ( i in 1:length(files)) {
source(paste("c:\\cygwin64\\home\\Mike\\partykit\\R\\",files[i],sep=""))
}
ctree <- function(formula, data, weights, subset, na.action = na.pass,
control = ctree_control(...), ytrafo = NULL,
scores = NULL, ...) {
if (missing(data))
data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action"),
names(mf), 0)
mf <- mf[c(1, m)]
### only necessary for extended model formulae
### e.g. multivariate responses
formula <- Formula::Formula(formula)
mf$formula <- formula
mf$drop.unused.levels <- FALSE
mf$na.action <- na.action
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response <- names(Formula::model.part(formula, mf, lhs = 1))
weights <- model.weights(mf)
dat <- mf[, colnames(mf) != "(weights)"]
if (!is.null(scores)) {
for (n in names(scores)) {
sc <- scores[[n]]
if (is.ordered(dat[[n]]) &&
nlevels(dat[[n]]) == length(sc)) {
attr(dat[[n]], "scores") <- as.numeric(sc)
} else {
warning("scores for variable ", sQuote(n), " ignored")
}
}
}
if (is.null(weights))
weights <- rep(1, nrow(mf))
storage.mode(weights) <- "integer"
nvar <- sum(!(colnames(dat) %in% response))
control$cfun <- function(...) {
if (control$teststat == "quad")
p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
if (control$teststat == "max")
p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
names(p) <- c("statistic", "p.value")
if (control$testtype == "Bonferroni")
p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
crit <- p["statistic"]
if (control$testtype != "Teststatistic")
crit <- p["p.value"]
c(crit, p)
}
#require(partykit)
#environment(partykit)
if (!is.null(get("delvar",envir=globalenv()))) {
eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
}
tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
ytrafo = ytrafo)
fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
"(weights)" = weights,
check.names = FALSE)
fitted[[3]] <- dat[, response, drop = length(response) == 1]
names(fitted)[3] <- "(response)"
ret <- party(tree, data = dat, fitted = fitted)
class(ret) <- c("constparty", class(ret))
### doesn't work for Surv objects
# ret$terms <- terms(formula, data = mf)
ret$terms <- terms(mf)
### need to adjust print and plot methods
### for multivariate responses
### if (length(response) > 1) class(ret) <- "party"
return(ret)
}
d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)
cttest
回答1:
It works on my system. You might need to call unlockBinding
first. This is what the testthatsomemore package does under the hood; see if that works for you.
install_github('robertzk/testthatsomemore')
testthatsomemore::package_stub("partykit", "ctree", function(...) return("foo"), {
# Your code that makes use of partykit::ctree should go here. The below will print "foo"
print(partykit::ctree("I have been overwritten"))
})
You can of course put the modified function in the third argument instead of the stub above.
来源:https://stackoverflow.com/questions/26598547/override-function-in-r-package