Advanced R Book Club !
August 24 2019
OO YEAH
Based on https://adv-r.hadley.nz/oo.html, https://adv-r.hadley.nz/base-types.html and https://adv-r.hadley.nz/s3.html
'Generally in R, functional programming is much more important than object-oriented programming.' -hw
diamonds <- ggplot2::diamonds
summary(diamonds$carat)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.2000 0.4000 0.7000 0.7979 1.0400 5.0100
summary(diamonds$cut)
Fair Good Very Good Premium Ideal
1610 4906 12082 13791 21551
diamonds <- ggplot2::diamonds
summary(diamonds$carat)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.2000 0.4000 0.7000 0.7979 1.0400 5.0100
summary(diamonds$cut)
Fair Good Very Good Premium Ideal
1610 4906 12082 13791 21551
See all the things that have a summary:
methods(summary)[1:20] %>% knitr::kable()
x |
---|
summary.aov |
summary.aovlist |
summary.aspell |
summary.check_packages_in_dir |
summary.connection |
summary.data.frame |
summary.Date |
summary.default |
summary.Duration |
summary.ecdf |
summary.factor |
summary.ggplot |
summary.glm |
summary.hcl_palettes |
summary.infl |
summary.Interval |
summary.lm |
summary.loess |
summary.manova |
summary.matrix |
See all the methods for a factor:
methods(class = factor) %>% tibble() %>% knitr::kable()
. |
---|
[.factor |
[[.factor |
[[<-.factor |
[<-.factor |
all.equal.factor |
as.character.factor |
as.data.frame.factor |
as.Date.factor |
as.list.factor |
as.logical.factor |
as.POSIXlt.factor |
as.vector.factor |
coerce,ANY,integer-method |
coerce,oldClass,S3-method |
Compare,numeric,Period-method |
Compare,Period,numeric-method |
droplevels.factor |
format.factor |
initialize,oldClass-method |
is_vector_s3.factor |
is.na<-.factor |
length<-.factor |
levels<-.factor |
Math.factor |
Ops,nonStructure,vector-method |
Ops,structure,vector-method |
Ops,vector,nonStructure-method |
Ops,vector,structure-method |
Ops.factor |
plot.factor |
print.factor |
relevel.factor |
relist.factor |
rep.factor |
show,oldClass-method |
slotsFromS3,factor-method |
summary.factor |
Summary.factor |
type_sum.factor |
xtfrm.factor |
cat.drink( 'milk' )
drink(cat, 'milk' )
Editorial opinion: OO is hard in R because there are so many different systems. OTOH, if you can OO in R, you can OO anywhere.
But some things are more objecty than others. Guess the results!
x = 1L
typeof( x )
class(x)
x = factor('a')
typeof( x )
is.object(x)
sloop::otype( x )
class(x)
sloop::s3_class(x)
x = 1L
typeof( x )
[1] "integer"
class(x)
[1] "integer"
is.object(x)
[1] FALSE
sloop::otype(x)
[1] "base"
x = factor('a')
typeof( x )
[1] "integer"
is.object(x)
[1] TRUE
sloop::otype( x )
[1] "S3"
class(x)
[1] "factor"
sloop::s3_class(x)
[1] "factor"
All objects have a base type. Some have an OO type also
Use typeof to access
Referenced in R core. Usually implemented in C, there are 25 of them. (NULL, logical, double etc. )
Examples:
f = factor(c('a','b','c'))
typeof( f )
[1] "integer"
OO Types: use attr(x, 'class') or class(x) or sloop::s3_class(x) ( at least for s3 classes)
attr(f, 'class')
[1] "factor"
class(f)
[1] "factor"
sloop::s3_class(f)
[1] "factor"
an object is an s3 class if it has a class attribute. (Surprise! A thing can have attributes but not be a class! )
f <- factor(c("a", "b", "c"))
typeof(f)
[1] "integer"
attributes(f)
$levels
[1] "a" "b" "c"
$class
[1] "factor"
TO get the base object, use unclass
unclass(f)
[1] 1 2 3
attr(,"levels")
[1] "a" "b" "c"
S3 objects can be passed to generics, like print
print(f)
[1] a b c
Levels: a b c
print(unclass(f))
[1] 1 2 3
attr(,"levels")
[1] "a" "b" "c"
Creating a generic is “easy”
print
function (x, ...)
UseMethod("print")
<bytecode: 0x7f961ebd6838>
<environment: namespace:base>
summary
function (object, ...)
UseMethod("summary")
<bytecode: 0x7f962197de50>
<environment: namespace:base>
Implementing it is the real work!
print.factor
function (x, quote = FALSE, max.levels = NULL, width = getOption("width"),
...)
{
ord <- is.ordered(x)
if (length(x) == 0L)
cat(if (ord)
"ordered"
else "factor", "(0)\n", sep = "")
else {
xx <- character(length(x))
xx[] <- as.character(x)
keepAttrs <- setdiff(names(attributes(x)), c("levels",
"class"))
attributes(xx)[keepAttrs] <- attributes(x)[keepAttrs]
print(xx, quote = quote, ...)
}
maxl <- if (is.null(max.levels))
TRUE
else max.levels
if (maxl) {
n <- length(lev <- encodeString(levels(x), quote = ifelse(quote,
"\"", "")))
colsep <- if (ord)
" < "
else " "
T0 <- "Levels: "
if (is.logical(maxl))
maxl <- {
width <- width - (nchar(T0, "w") + 3L + 1L +
3L)
lenl <- cumsum(nchar(lev, "w") + nchar(colsep,
"w"))
if (n <= 1L || lenl[n] <= width)
n
else max(1L, which.max(lenl > width) - 1L)
}
drop <- n > maxl
cat(if (drop)
paste(format(n), ""), T0, paste(if (drop)
c(lev[1L:max(1, maxl - 1)], "...", if (maxl > 1) lev[n])
else lev, collapse = colsep), "\n", sep = "")
}
if (!isTRUE(val <- .valid.factor(x)))
warning(val)
invisible(x)
}
<bytecode: 0x7f96212960c8>
<environment: namespace:base>
print.POSIXct
function (x, tz = "", usetz = TRUE, max = NULL, ...)
{
if (is.null(max))
max <- getOption("max.print", 9999L)
FORM <- if (missing(tz))
function(z) format(z, usetz = usetz)
else function(z) format(z, tz = tz, usetz = usetz)
if (max < length(x)) {
print(FORM(x[seq_len(max)]), max = max + 1, ...)
cat(" [ reached 'max' / getOption(\"max.print\") -- omitted",
length(x) - max, "entries ]\n")
}
else if (length(x))
print(FORM(x), max = max, ...)
else cat(class(x)[1L], "of length 0\n")
invisible(x)
}
<bytecode: 0x7f96229f0fb8>
<environment: namespace:base>
Usually the . in the funtion name means its an implemntation of a generic. Earlier we saw all the implementation of “summary”“
Sometimes the implementations are hidden and you can see them like this
sloop::s3_get_method( weighted.mean.Date)
function (x, w, ...)
structure(weighted.mean(unclass(x), w, ...), class = "Date")
<bytecode: 0x7f96226baf50>
<environment: namespace:stats>
Let's do one together!
structure(c(1,2,3), levels = c("a",'b', 'c'), class = 'factor')
[1] a b c
Levels: a b c
f = c(1,2,3) %>% as.integer() # why as integer: https://github.com/wch/r-source/blob/27da0eac8bb84677002febcf12e6d61bb7358d89/src/main/attrib.c
attr(f,'levels') = c("a",'b', 'c')
attr(f,'class' ) ='factor'
f
[1] a b c
Levels: a b c
other useful functions:
The vectrs package has some advice for doing this saafely
Constructor, validator, internal validator: new_date ( for internal use) validate_date ( for lots of places) date ( for external use)
Thats what UseMethod is
sloop::s3_dispatch(print(1))
print.double
print.numeric
=> print.default
Vector objects (e.g date, factor)
Data frame
df = data.frame(a = c(1,2), b = c(1,2))
unclass(df)
$a
[1] 1 2
$b
[1] 1 2
attr(,"row.names")
[1] 1 2
typeof(df)
[1] "list"
Scalar objects ( based on lists usually)
mdl = lm( iris$Sepal.Length ~ iris$Sepal.Width)
typeof(mdl)
[1] "list"
names(mdl)
[1] "coefficients" "residuals" "effects" "rank"
[5] "fitted.values" "assign" "qr" "df.residual"
[9] "xlevels" "call" "terms" "model"
class(mdl)
[1] "lm"
If you wanna build your own classes, check out the vctrs package
new_secret <- function(x = double()) {
stopifnot(is.double(x))
structure(x, class = "secret")
}
print.secret <- function(x, ...) {
print(strrep("x", nchar(x)))
invisible(x)
}
x <- new_secret(c(15, 1, 456))
Not like this (why not)?
`[.secret` <- function(x, i) {
new_secret(x[i])
}
Fine but makes an extra copy
`[.secret` <- function(x, i) {
k = unclass(x)
new_secret(k[i])
}
Delegates to the primitive “[”
`[.secret` <- function(x, i) {
k = unclass(x)
new_secret(NextMethod())
}
Base types - class does always determine method dispatch ( Especially for simpler types like matrix )
Internal generics- some methods (e.g. cbind, sum) are generics but their genericness is not obvious because it is implemented in C
sloop:::internal_generics()
[1] "+" "-" "*" "^"
[5] "%%" "%/%" "/" "=="
[9] ">" "<" "!=" "<="
[13] ">=" "&" "|" "abs"
[17] "sign" "sqrt" "ceiling" "floor"
[21] "trunc" "cummax" "cummin" "cumprod"
[25] "cumsum" "exp" "expm1" "log"
[29] "log10" "log2" "log1p" "cos"
[33] "cosh" "sin" "sinh" "tan"
[37] "tanh" "acos" "acosh" "asin"
[41] "asinh" "atan" "atanh" "cospi"
[45] "sinpi" "tanpi" "gamma" "lgamma"
[49] "digamma" "trigamma" "round" "signif"
[53] "max" "min" "range" "prod"
[57] "sum" "any" "all" "Arg"
[61] "Conj" "Im" "Mod" "Re"
[65] "anyNA" "as.character" "as.complex" "as.double"
[69] "as.environment" "as.integer" "as.logical" "as.call"
[73] "as.numeric" "as.raw" "c" "dim"
[77] "dim<-" "dimnames" "dimnames<-" "is.array"
[81] "is.finite" "is.infinite" "is.matrix" "is.na"
[85] "is.nan" "is.numeric" "length" "length<-"
[89] "levels<-" "names" "names<-" "rep"
[93] "seq.int" "xtfrm" "[" "[["
[97] "$" "[<-" "[[<-" "$<-"
[101] "unlist" "cbind" "rbind" "as.vector"
Group generics
sloop:::group_generics()
$Ops
[1] "+" "-" "*" "^" "%%" "%/%" "/" "==" ">" "<" "!="
[12] "<=" ">=" "&" "|"
$Math
[1] "abs" "sign" "sqrt" "ceiling" "floor" "trunc"
[7] "cummax" "cummin" "cumprod" "cumsum" "exp" "expm1"
[13] "log" "log10" "log2" "log1p" "cos" "cosh"
[19] "sin" "sinh" "tan" "tanh" "acos" "acosh"
[25] "asin" "asinh" "atan" "atanh" "cospi" "sinpi"
[31] "tanpi" "gamma" "lgamma" "digamma" "trigamma" "round"
[37] "signif"
$Summary
[1] "max" "min" "range" "prod" "sum" "any" "all"
$Complex
[1] "Arg" "Conj" "Im" "Mod" "Re"
Primitive Generics
.S3PrimitiveGenerics
[1] "anyNA" "as.character" "as.complex" "as.double"
[5] "as.environment" "as.integer" "as.logical" "as.call"
[9] "as.numeric" "as.raw" "c" "dim"
[13] "dim<-" "dimnames" "dimnames<-" "is.array"
[17] "is.finite" "is.infinite" "is.matrix" "is.na"
[21] "is.nan" "is.numeric" "length" "length<-"
[25] "levels<-" "names" "names<-" "rep"
[29] "seq.int" "xtfrm"
Internal generics do not use the implicit class
install.packages('devtools');
devtools::install_github( "djacobs7/remembr");
remembr::install_remembr()
remembr::addCardDeck("advanced-r-object-oriented-part-1")
:: base::- base::: base::! base::( base::[ base::[[ base::{ base::& base::^ base::+ base::< base::<- base::> base::~ base::$ base::abs base::all base::as.Date base::as.difftime base::as.double base::as.POSIXlt base::attr base::attributes base::c base::character base::class base::data.frame base::difftime base::double base::factor base::formals base::function base::globalenv base::if base::inherits base::integer base::invisible base::is.character base::is.double base::is.integer base::is.na base::is.numeric base::is.object base::ISOdatetime base::length base::library base::list base::log base::match base::match.arg base::matrix base::max base::mean base::nchar base::NextMethod base::nrow base::ordered base::print base::quote base::rep base::sample base::set.seed base::source base::stop base::stopifnot base::strptime base::strrep base::structure base::sum base::summary base::Sys.Date base::Sys.time base::t base::table base::typeof base::unclass base::unique base::UseMethod knitr::include_graphics sloop::ftype sloop::otype sloop::s3_class sloop::s3_dispatch sloop::s3_get_method sloop::s3_methods_class sloop::s3_methods_generic stats::ecdf stats::lm stats::rpois stats4::mle utils::str vctrs::vec_restore
lm( iris$Sepal.Length ~ iris$Sepal.Length )
remembr::flashCards()
But some things are more objecty than others. Guess the results!
is.object(1:10)
sloop::otype(1:10)
is.object(mtcars)
sloop::otype(mtcars)
x = matrix(1:4, nrow =2)
class(x)
sloop::s3_class(x)
Here are the results
is.object(1:10)
[1] FALSE
sloop::otype(1:10)
[1] "base"
is.object(mtcars)
[1] TRUE
sloop::otype(mtcars)
[1] "S3"
x = matrix(1:4, nrow =2)
class(x)
[1] "matrix"
sloop::s3_class(x)
[1] "matrix" "integer" "numeric"