Conditional predicted value and average marginal effect plots for models

Draw one or more conditional effects plots reflecting predictions or marginal effects from a model, conditional on a covariate. Currently methods exist for “lm”, “glm”, “loess” class models.

cplot(object, ...)

# S3 method for default
cplot(object,
  x = attributes(terms(object))[["term.labels"]][1L], dx = x,
  what = c("prediction", "effect"), data = prediction::find_data(object),
  type = c("response", "link"), vcov = stats::vcov(object), at, n = 25L,
  xvals = prediction::seq_range(data[[x]], n = n), level = 0.95,
  draw = TRUE, xlab = x, ylab = if (match.arg(what) == "prediction")
  paste0("Predicted value") else paste0("Marginal effect of ", dx),
  xlim = NULL, ylim = NULL, lwd = 1L, col = "black", lty = 1L,
  se.type = c("shade", "lines", "none"), se.col = "black",
  se.fill = grDevices::gray(0.5, 0.5), se.lwd = lwd, se.lty = if
  (match.arg(se.type) == "lines") 1L else 0L, factor.lty = 0L,
  factor.pch = 19L, factor.col = se.col, factor.fill = factor.col,
  factor.cex = 1L, xaxs = "i", yaxs = xaxs, las = 1L, scatter = FALSE,
  scatter.pch = 19L, scatter.col = se.col, scatter.bg = scatter.col,
  scatter.cex = 0.5, rug = TRUE, rug.col = col, rug.size = -0.02, ...)

# S3 method for clm
cplot(object,
  x = attributes(terms(object))[["term.labels"]][1L], dx = x,
  what = c("prediction", "classprediction", "stackedprediction", "effect"),
  data = prediction::find_data(object), type = c("response", "link"),
  vcov = stats::vcov(object), at, n = 25L, xvals = seq_range(data[[x]], n
  = n), level = 0.95, draw = TRUE, xlab = x, ylab = if (match.arg(what)
  == "effect") paste0("Marginal effect of ", dx) else paste0("Predicted value"),
  xlim = NULL, ylim = if (match.arg(what) %in% c("prediction",
  "stackedprediction")) c(0, 1.04) else NULL, lwd = 1L, col = "black",
  lty = 1L, factor.lty = 1L, factor.pch = 19L, factor.col = col,
  factor.fill = factor.col, factor.cex = 1L, xaxs = "i", yaxs = xaxs,
  las = 1L, scatter = FALSE, scatter.pch = 19L,
  scatter.col = factor.col, scatter.bg = scatter.col, scatter.cex = 0.5,
  rug = TRUE, rug.col = col, rug.size = -0.02, ...)

# S3 method for glm
cplot(object,
  x = attributes(terms(object))[["term.labels"]][1L], dx = x,
  what = c("prediction", "effect"), data = prediction::find_data(object),
  type = c("response", "link"), vcov = stats::vcov(object), at, n = 25L,
  xvals = prediction::seq_range(data[[x]], n = n), level = 0.95,
  draw = TRUE, xlab = x, ylab = if (match.arg(what) == "prediction")
  paste0("Predicted value") else paste0("Marginal effect of ", dx),
  xlim = NULL, ylim = NULL, lwd = 1L, col = "black", lty = 1L,
  se.type = c("shade", "lines", "none"), se.col = "black",
  se.fill = grDevices::gray(0.5, 0.5), se.lwd = lwd, se.lty = if
  (match.arg(se.type) == "lines") 1L else 0L, factor.lty = 0L,
  factor.pch = 19L, factor.col = se.col, factor.fill = factor.col,
  factor.cex = 1L, xaxs = "i", yaxs = xaxs, las = 1L, scatter = FALSE,
  scatter.pch = 19L, scatter.col = se.col, scatter.bg = scatter.col,
  scatter.cex = 0.5, rug = TRUE, rug.col = col, rug.size = -0.02, ...)

# S3 method for lm
cplot(object, x = attributes(terms(object))[["term.labels"]][1L],
  dx = x, what = c("prediction", "effect"),
  data = prediction::find_data(object), type = c("response", "link"),
  vcov = stats::vcov(object), at, n = 25L,
  xvals = prediction::seq_range(data[[x]], n = n), level = 0.95,
  draw = TRUE, xlab = x, ylab = if (match.arg(what) == "prediction")
  paste0("Predicted value") else paste0("Marginal effect of ", dx),
  xlim = NULL, ylim = NULL, lwd = 1L, col = "black", lty = 1L,
  se.type = c("shade", "lines", "none"), se.col = "black",
  se.fill = grDevices::gray(0.5, 0.5), se.lwd = lwd, se.lty = if
  (match.arg(se.type) == "lines") 1L else 0L, factor.lty = 0L,
  factor.pch = 19L, factor.col = se.col, factor.fill = factor.col,
  factor.cex = 1L, xaxs = "i", yaxs = xaxs, las = 1L, scatter = FALSE,
  scatter.pch = 19L, scatter.col = se.col, scatter.bg = scatter.col,
  scatter.cex = 0.5, rug = TRUE, rug.col = col, rug.size = -0.02, ...)

# S3 method for loess
cplot(object,
  x = attributes(terms(object))[["term.labels"]][1L], dx = x,
  what = c("prediction", "effect"), data = prediction::find_data(object),
  type = c("response", "link"), vcov = stats::vcov(object), at, n = 25L,
  xvals = prediction::seq_range(data[[x]], n = n), level = 0.95,
  draw = TRUE, xlab = x, ylab = if (match.arg(what) == "prediction")
  paste0("Predicted value") else paste0("Marginal effect of ", dx),
  xlim = NULL, ylim = NULL, lwd = 1L, col = "black", lty = 1L,
  se.type = c("shade", "lines", "none"), se.col = "black",
  se.fill = grDevices::gray(0.5, 0.5), se.lwd = lwd, se.lty = if
  (match.arg(se.type) == "lines") 1L else 0L, factor.lty = 0L,
  factor.pch = 19L, factor.col = se.col, factor.fill = factor.col,
  factor.cex = 1L, xaxs = "i", yaxs = xaxs, las = 1L, scatter = FALSE,
  scatter.pch = 19L, scatter.col = se.col, scatter.bg = scatter.col,
  scatter.cex = 0.5, rug = TRUE, rug.col = col, rug.size = -0.02, ...)

# S3 method for polr
cplot(object,
  x = attributes(terms(object))[["term.labels"]][1L], dx = x,
  what = c("prediction", "classprediction", "stackedprediction", "effect"),
  data = prediction::find_data(object), type = c("response", "link"),
  vcov = stats::vcov(object), at, n = 25L, xvals = seq_range(data[[x]], n
  = n), level = 0.95, draw = TRUE, xlab = x, ylab = if (match.arg(what)
  == "effect") paste0("Marginal effect of ", dx) else paste0("Predicted value"),
  xlim = NULL, ylim = if (match.arg(what) %in% c("prediction",
  "stackedprediction")) c(0, 1.04) else NULL, lwd = 1L, col = "black",
  lty = 1L, factor.lty = 1L, factor.pch = 19L, factor.col = col,
  factor.fill = factor.col, factor.cex = 1L, xaxs = "i", yaxs = xaxs,
  las = 1L, scatter = FALSE, scatter.pch = 19L,
  scatter.col = factor.col, scatter.bg = scatter.col, scatter.cex = 0.5,
  rug = TRUE, rug.col = col, rug.size = -0.02, ...)

# S3 method for multinom
cplot(object,
  x = attributes(terms(object))[["term.labels"]][1L], dx = x,
  what = c("prediction", "classprediction", "stackedprediction", "effect"),
  data = prediction::find_data(object), type = c("response", "link"),
  vcov = stats::vcov(object), at, n = 25L, xvals = seq_range(data[[x]], n
  = n), level = 0.95, draw = TRUE, xlab = x, ylab = if (match.arg(what)
  == "effect") paste0("Marginal effect of ", dx) else paste0("Predicted value"),
  xlim = NULL, ylim = if (match.arg(what) %in% c("prediction",
  "stackedprediction")) c(0, 1.04) else NULL, lwd = 1L, col = "black",
  lty = 1L, factor.lty = 1L, factor.pch = 19L, factor.col = col,
  factor.fill = factor.col, factor.cex = 1L, xaxs = "i", yaxs = xaxs,
  las = 1L, scatter = FALSE, scatter.pch = 19L,
  scatter.col = factor.col, scatter.bg = scatter.col, scatter.cex = 0.5,
  rug = TRUE, rug.col = col, rug.size = -0.02, ...)

Arguments

object	A model object.
…	Additional arguments passed to `plot`.
x	A character string specifying the name of variable to use as the x-axis dimension in the plot.
dx	If `what = "effect"`, the variable whose conditional marginal effect should be displayed. By default it is `x` (so the plot displays the marginal effect of `x` across values of `x`); ignored otherwise. If `dx` is a factor with more than 2 levels, an error will be issued.
what	A character string specifying whether to draw a “prediction” (fitted values from the model, calculated using `predict`) or an “effect” (average marginal effect of `dx` conditional on `x`, using `margins`). Methods for classes other than “lm” or “glm” may provided additional options (e.g., `cplot.polr()` provides “stackedprediction” and “class” alternatives).
data	A data frame to override the default value offered in `object[["model"]]`.
type	A character string specifying whether to calculate predictions on the response scale (default) or link (only relevant for non-linear models).
vcov	A matrix containing the variance-covariance matrix for estimated model coefficients, or a function to perform the estimation with `model` as its only argument.
at	Currently ignored.
n	An integer specifying the number of points across `x` at which to calculate the predicted value or marginal effect, when `x` is numeric. Ignored otherwise.
xvals	A numeric vector of values at which to calculate predictions or marginal effects, if `x` is numeric. By default, it is calculated from the data using `seq_range`. If `x` is a factor, this is ignored, as is `n`.
level	The confidence level required (used to draw uncertainty bounds).
draw	A logical (default `TRUE`), specifying whether to draw the plot. If `FALSE`, the data used in drawing are returned as a list of data.frames. This might be useful if you want to plot using an alternative plotting package (e.g., ggplot2). Also, if set to value “add”, then the resulting data is added to the existing plot.
xlab	A character string specifying the value of `xlab` in `plot`.
ylab	A character string specifying the value of `ylab` in `plot`.
xlim	A two-element numeric vector specifying the x-axis limits. Set automatically if missing.
ylim	A two-element numeric vector specifying the y-axis limits. Set automatically if missing.
lwd	An integer specifying the width of the prediction or marginal effect line. See `lines`. If `x` is a factor variable in the model, this is used to set the line width of the error bars.
col	A character string specifying the color of the prediction or marginal effect line. If `x` is a factor variable in the model, this is used to set the color of the error bars.
lty	An integer specifying the “line type” of the prediction or marginal effect line. See `par`. If `x` is a factor variable in the model, this is used to set the line type of the error bars.
se.type	A character string specifying whether to draw the confidence interval as “lines” (the default, using `lines`) or a “shade” (using `polygon`).
se.col	If `se.type = "lines"`, a character string specifying the color of the confidence interval lines. If `se.type = "shade"`, the color of the shaded region border.
se.fill	If `se.type = "shade"`, the color of the shaded region. Ignored otherwise.
se.lwd	If `se.type = "lines"`, the width of the confidence interval lines. See `lines`.
se.lty	If `se.type = "lines"`, an integer specifying the “line type” of the confidence interval lines; if `se.type = "shade"`, the line type of the shaded polygon border. See `par`.
factor.lty	If `x` is a factor variable in the model, this is used to set the line type of an optional line connecting predictions across factor levels. If `factor.lty = 0L` (the default), no line is drawn.. See `par`.
factor.pch	If `x` is a factor variable in the model, the shape to use when drawing points. See `points`.
factor.col	If `x` is a factor variable in the model, the color to use for the border of the points. See `points`.
factor.fill	If `x` is a factor variable in the model, the color to use for the fill of the points. See `points`.
factor.cex	If `x` is a factor variable in the model, the “expansion factor” to use for the point size. See `points`.
xaxs	A character string specifying `xaxs`. See `par`.
yaxs	A character string specifying `xaxs`. See `par`.
las	An integer string specifying `las`. See `par`.
scatter	A logical indicating whether to plot the observed data in `data` as a scatterplot.
scatter.pch	If `scatter = TRUE`, an integer specifying a shape to use for plotting the data. See `points`.
scatter.col	If `scatter = TRUE`, a character string specifying a color to use for plotting the data. See `points`.
scatter.bg	If `scatter = TRUE`, a character string specifying a color to use for plotting the data. See `points`.
scatter.cex	If `scatter = TRUE`, an integer specifying the size of the points. See `points`.
rug	A logical specifying whether to include an x-axis “rug” (see `rug`).
rug.col	A character string specifying `col` to `rug`.
rug.size	A numeric value specifying `ticksize` to `rug`.

Value

A tidy data frame containing the data used to draw the plot. Use draw = FALSE to simply generate the data structure for use elsewhere.

Details

Note that when what = "prediction", the plots show predictions holding values of the data at their mean or mode, whereas when what = "effect" average marginal effects (i.e., at observed values) are shown.

When examining generalized linear models (e.g., logistic regression models), confidence intervals for predictions can fall outside of the response scale (again, for logistic regression this means confidence intervals can exceed the (0,1) bounds). This is consistent with the behavior of predict but may not be desired. The examples (below) show ways of constraining confidence intervals to these bounds.

The overall aesthetic is somewhat similar to to the output produced by the marginalModelPlot() function in the car package.

Examples

# NOT RUN {
require('datasets')
# prediction from several angles
m <- lm(Sepal.Length ~ Sepal.Width, data = iris)
cplot(m)

# more complex model
m <- lm(Sepal.Length ~ Sepal.Width * Petal.Width * I(Petal.Width ^ 2),
        data = head(iris, 50))
## marginal effect of 'Petal.Width' across 'Petal.Width'
cplot(m, x = "Petal.Width", what = "effect", n = 10)

# factor independent variables
mtcars[["am"]] <- factor(mtcars[["am"]])
m <- lm(mpg ~ am * wt, data = mtcars)
## predicted values for each factor level
cplot(m, x = "am")
## marginal effect of each factor level across numeric variable
cplot(m, x = "wt", dx = "am", what = "effect")

# marginal effect of 'Petal.Width' across 'Sepal.Width'
## without drawing the plot
## this might be useful for using, e.g., ggplot2 for plotting
tmp <- cplot(m, x = "Sepal.Width", dx = "Petal.Width",
             what = "effect", n = 10, draw = FALSE)
if (require("ggplot2")) {
  # use ggplot2 instead of base graphics
  ggplot(tmp, aes(x = Petal.Width, y = "effect")) +
         geom_line(lwd = 2) +
         geom_line(aes(y = effect + 1.96*se.effect)) +
         geom_line(aes(y = effect - 1.96*se.effect))
}

# a non-linear model
m <- glm(am ~ wt*drat, data = mtcars, family = binomial)
cplot(m, x = "wt") # prediction (response scale)
cplot(m, x = "wt") # prediction (link scale)
if (require("ggplot2")) {
  # prediction (response scale, constrained to [0,1])
  cplotdat <- cplot(m, x = "wt", type = "link", draw = FALSE)
  ggplot(cplotdat, aes(x = xvals, y = plogis(yvals))) +
         geom_line(lwd = 1.5) +
         geom_line(aes(y = plogis(upper))) +
         geom_line(aes(y = plotis(lower)))
}

# effects on linear predictor and outcome
cplot(m, x = "drat", dx = "wt", what = "effect", type = "link")
cplot(m, x = "drat", dx = "wt", what = "effect", type = "response")

# plot conditional predictions across a third factor
local({
  iris$long <- rbinom(nrow(iris), 1, 0.6)
  x <- glm(long ~ Sepal.Width*Species, data = iris)
  cplot(x, x = "Sepal.Width", data = iris[iris$Species == "setosa", ],
        ylim = c(0,1), col = "red", se.fill = rgb(1,0,0,.5), xlim = c(2,4.5))
  cplot(x, x = "Sepal.Width", data = iris[iris$Species == "versicolor", ],
        draw = "add", col = "blue", se.fill = rgb(0,1,0,.5))
  cplot(x, x = "Sepal.Width", data = iris[iris$Species == "virginica", ],
        draw = "add", col = "green", se.fill = rgb(0,0,1,.5))
})

# ordinal outcome
if (require("MASS")) {
  # x is a factor variable
  house.plr <- polr(Sat ~ Infl + Type + Cont, weights = Freq,
                    data = housing)
  ## predicted probabilities
  cplot(house.plr)
  ## cumulative predicted probabilities
  cplot(house.plr, what = "stacked")
  ## ggplot2 example
  if (require("ggplot2")) {
    ggplot(cplot(house.plr), aes(x = xvals, y = yvals, group = level)) +
      geom_line(aes(color = level))
  }

  # x is continuous
  cyl.plr <- polr(factor(cyl) ~ wt, data = mtcars)
  cplot(cyl.plr, col = c("red", "purple", "blue"), what = "stacked")
  cplot(cyl.plr, what = "class")
}

# }

Conditional predicted value and average marginal effect plots for models

Arguments

Value

Details

See also

Examples

Contents