predict.bigssg {bigsplines} | R Documentation |
Get fitted values and standard error estimates for generalized smoothing spline anova models.
## S3 method for class 'bigssg' predict(object,newdata=NULL,se.lp=FALSE,include=object$tnames, effect=c("all","0","lin","non"),includeint=FALSE, design=FALSE,smoothMatrix=FALSE,intercept=NULL,...)
object |
Object of class "bigssg", which is output from |
newdata |
Data frame or list containing the new data points for prediction. Variable names must match those used in the |
se.lp |
Logical indicating if the standard errors of the linear predictors (η) should be estimated. Default is |
include |
Which terms to include in the estimate. You can get fitted values for any combination of terms in the |
effect |
Which effect to estimate: |
includeint |
Logical indicating whether the intercept should be included in the prediction. If |
design |
Logical indicating whether the design matrix should be returned. |
smoothMatrix |
Logical indicating whether the smoothing matrix should be returned. |
intercept |
Logical indicating whether the intercept should be included in the prediction. When used, this input overrides the |
... |
Ignored. |
Uses the coefficient and smoothing parameter estimates from a fit generalized smoothing spline anova (estimated by bigssg
) to predict for new data.
Returns list with elements:
fitted.values |
Vector of fitted values (on data scale) |
linear.predictors |
Vector of fitted values (on link scale) |
se.lp |
Vector of standard errors of linear predictors (if |
X |
Design matrix used to create linear predictors (if |
ix |
Index vector such that |
S |
Smoothing matrix corresponding to fitted values (if |
Nathaniel E. Helwig <helwig@umn.edu>
Gu, C. (2013). Smoothing spline ANOVA models, 2nd edition. New York: Springer.
Gu, C. and Xiang, D. (2001). Cross-validating non-Gaussian data: Generalized approximate cross-validation revisited. Journal of Computational and Graphical Statistics, 10, 581-591.
Helwig, N. E. (2017). Regression with ordered predictors via ordinal smoothing splines. Frontiers in Applied Mathematics and Statistics, 3(15), 1-13.
Helwig, N. E. and Ma, P. (2015). Fast and stable multiple smoothing parameter selection in smoothing spline analysis of variance models with large samples. Journal of Computational and Graphical Statistics, 24, 715-732.
Helwig, N. E. and Ma, P. (2016). Smoothing spline ANOVA for super-large samples: Scalable computation via rounding parameters. Statistics and Its Interface, 9, 433-444.
########## EXAMPLE 1 ########## # define univariate function and data set.seed(1) myfun <- function(x){ sin(2*pi*x) } ndpts <- 1000 x <- runif(ndpts) # negative binomial response (unknown dispersion) set.seed(773) lp <- myfun(x) mu <- exp(lp) y <- rnbinom(n=ndpts,size=2,mu=mu) # fit cubic spline model cubmod <- bigssg(y~x,family="negbin",type="cub",nknots=20) 1/cubmod$dispersion ## dispersion = 1/size crossprod( lp - cubmod$linear.predictor )/length(lp) # define new data for prediction newdata <- data.frame(x=seq(0,1,length.out=100)) # get fitted values and standard errors for new data yc <- predict(cubmod,newdata,se.lp=TRUE) # plot results with 95% Bayesian confidence interval (link scale) plot(newdata$x,yc$linear.predictor,type="l") lines(newdata$x,yc$linear.predictor+qnorm(.975)*yc$se.lp,lty=3) lines(newdata$x,yc$linear.predictor-qnorm(.975)*yc$se.lp,lty=3) # plot results with 95% Bayesian confidence interval (data scale) plot(newdata$x,yc$fitted,type="l") lines(newdata$x,exp(yc$linear.predictor+qnorm(.975)*yc$se.lp),lty=3) lines(newdata$x,exp(yc$linear.predictor-qnorm(.975)*yc$se.lp),lty=3) # predict constant, linear, and nonlinear effects yc0 <- predict(cubmod,newdata,se.lp=TRUE,effect="0") ycl <- predict(cubmod,newdata,se.lp=TRUE,effect="lin") ycn <- predict(cubmod,newdata,se.lp=TRUE,effect="non") crossprod( yc$linear - (yc0$linear + ycl$linear + ycn$linear) ) # plot results with 95% Bayesian confidence intervals (link scale) par(mfrow=c(1,2)) plot(newdata$x,ycl$linear,type="l",main="Linear effect") lines(newdata$x,ycl$linear+qnorm(.975)*ycl$se.lp,lty=3) lines(newdata$x,ycl$linear-qnorm(.975)*ycl$se.lp,lty=3) plot(newdata$x,ycn$linear,type="l",main="Nonlinear effect") lines(newdata$x,ycn$linear+qnorm(.975)*ycn$se.lp,lty=3) lines(newdata$x,ycn$linear-qnorm(.975)*ycn$se.lp,lty=3) # plot results with 95% Bayesian confidence intervals (data scale) par(mfrow=c(1,2)) plot(newdata$x,ycl$fitted,type="l",main="Linear effect") lines(newdata$x,exp(ycl$linear+qnorm(.975)*ycl$se.lp),lty=3) lines(newdata$x,exp(ycl$linear-qnorm(.975)*ycl$se.lp),lty=3) plot(newdata$x,ycn$fitted,type="l",main="Nonlinear effect") lines(newdata$x,exp(ycn$linear+qnorm(.975)*ycn$se.lp),lty=3) lines(newdata$x,exp(ycn$linear-qnorm(.975)*ycn$se.lp),lty=3) ########## EXAMPLE 2 ########## # define bivariate function and data set.seed(1) myfun <- function(x1v,x2v){ sin(2*pi*x1v) + log(x2v+.1) + cos(pi*(x1v-x2v)) } ndpts <- 1000 x1v <- runif(ndpts) x2v <- runif(ndpts) # binomial response (with weights) set.seed(773) lp <- myfun(x1v,x2v) p <- 1/(1+exp(-lp)) w <- sample(c(10,20,30,40,50),length(p),replace=TRUE) y <- rbinom(n=ndpts,size=w,p=p)/w ## y is proportion correct cubmod <- bigssg(y~x1v*x2v,family="binomial",type=list(x1v="cub",x2v="cub"),nknots=100,weights=w) crossprod( lp - cubmod$linear.predictor )/length(lp) # define new data for prediction xnew <- as.matrix(expand.grid(seq(0,1,length=50),seq(0,1,length=50))) newdata <- list(x1v=xnew[,1],x2v=xnew[,2]) # get fitted values for new data yp <- predict(cubmod,newdata) # plot linear predictor and fitted values par(mfrow=c(2,2)) imagebar(seq(0,1,l=50),seq(0,1,l=50),matrix(myfun(newdata$x1v,newdata$x2v),50,50), xlab=expression(italic(x)[1]),ylab=expression(italic(x)[2]), zlab=expression(hat(italic(y))),zlim=c(-4.5,1.5),main="True Linear Predictor") imagebar(seq(0,1,l=50),seq(0,1,l=50),matrix(yp$linear.predictor,50,50), xlab=expression(italic(x)[1]),ylab=expression(italic(x)[2]), zlab=expression(hat(italic(y))),zlim=c(-4.5,1.5),main="Estimated Linear Predictor") newprob <- 1/(1+exp(-myfun(newdata$x1v,newdata$x2v))) imagebar(seq(0,1,l=50),seq(0,1,l=50),matrix(newprob,50,50), xlab=expression(italic(x)[1]),ylab=expression(italic(x)[2]), zlab=expression(hat(italic(y))),zlim=c(0,0.8),main="True Probabilities") imagebar(seq(0,1,l=50),seq(0,1,l=50),matrix(yp$fitted.values,50,50), xlab=expression(italic(x)[1]),ylab=expression(italic(x)[2]), zlab=expression(hat(italic(y))),zlim=c(0,0.8),main="Estimated Probabilities") # predict linear and nonlinear effects for x1v (link scale) newdata <- list(x1v=seq(0,1,length.out=100)) yl <- predict(cubmod,newdata,include="x1v",effect="lin",se.lp=TRUE) yn <- predict(cubmod,newdata,include="x1v",effect="non",se.lp=TRUE) # plot results with 95% Bayesian confidence intervals (link scale) par(mfrow=c(1,2)) plot(newdata$x1v,yl$linear,type="l",main="Linear effect") lines(newdata$x1v,yl$linear+qnorm(.975)*yl$se.lp,lty=3) lines(newdata$x1v,yl$linear-qnorm(.975)*yl$se.lp,lty=3) plot(newdata$x1v,yn$linear,type="l",main="Nonlinear effect") lines(newdata$x1v,yn$linear+qnorm(.975)*yn$se.lp,lty=3) lines(newdata$x1v,yn$linear-qnorm(.975)*yn$se.lp,lty=3)