学习一下R中的所有的apply函数,来自R中本身自带的教程;这里的主要的学习方式是通过谢老大的printr包对R本身自带的说明功能来进行学习
大致有哪些apply系列函数,
1 apply函数(对一个矩阵按行或者按列[就是margin]进行计算):
1.1 usage
apply | R Documentation |
Apply Functions Over Array Margins
Description
Returns a vector or array or list of values obtained by applying a function to margins of an array or matrix.
Usage
apply(X, MARGIN, FUN, ...)
Arguments
X
|
an array, including a matrix. |
MARGIN
|
a vector giving the subscripts which the function will be applied over. E.g., for a matrix |
FUN
|
the function to be applied: see ‘Details’. In the case of functions like |
…
|
optional arguments to |
1.2 demo
## Compute row and column sums for a matrix:
x <- cbind(x1 = 3, x2 = c(4:1, 2:5))
dimnames(x)[[1]] <- letters[1:8]
apply(x, 2, mean, trim = .2)
> x1 x2
> 3 3
col.sums <- apply(x, 2, sum)
row.sums <- apply(x, 1, sum)
rbind(cbind(x, Rtot = row.sums), Ctot = c(col.sums, sum(col.sums)))
x1 | x2 | Rtot | |
---|---|---|---|
a | 3 | 4 | 7 |
b | 3 | 3 | 6 |
c | 3 | 2 | 5 |
d | 3 | 1 | 4 |
e | 3 | 2 | 5 |
f | 3 | 3 | 6 |
g | 3 | 4 | 7 |
h | 3 | 5 | 8 |
Ctot | 24 | 24 | 48 |
stopifnot( apply(x, 2, is.vector))
## Sort the columns of a matrix
apply(x, 2, sort)
x1 | x2 |
---|---|
3 | 1 |
3 | 2 |
3 | 2 |
3 | 3 |
3 | 3 |
3 | 4 |
3 | 4 |
3 | 5 |
## keeping named dimnames
names(dimnames(x)) <- c("row", "col")
x3 <- array(x, dim = c(dim(x),3),
dimnames = c(dimnames(x), list(C = paste0("cop.",1:3))))
identical(x, apply( x, 2, identity))
> [1] TRUE
identical(x3, apply(x3, 2:3, identity))
> [1] TRUE
##- function with extra args:
cave <- function(x, c1, c2) c(mean(x[c1]), mean(x[c2]))
apply(x, 1, cave, c1 = "x1", c2 = c("x1","x2"))
a | b | c | d | e | f | g | h |
---|---|---|---|---|---|---|---|
3.0 | 3 | 3.0 | 3 | 3.0 | 3 | 3.0 | 3 |
3.5 | 3 | 2.5 | 2 | 2.5 | 3 | 3.5 | 4 |
ma <- matrix(c(1:4, 1, 6:8), nrow = 2)
ma
1 | 3 | 1 | 7 |
2 | 4 | 6 | 8 |
apply(ma, 1, table) #--> a list of length 2
> [[1]]
>
> 1 3 7
> 2 1 1
>
> [[2]]
>
> 2 4 6 8
> 1 1 1 1
apply(ma, 1, stats::quantile) # 5 x n matrix with rownames
0% | 1 | 2.0 |
25% | 1 | 3.5 |
50% | 2 | 5.0 |
75% | 4 | 6.5 |
100% | 7 | 8.0 |
stopifnot(dim(ma) == dim(apply(ma, 1:2, sum)))
## Example with different lengths for each call
z <- array(1:24, dim = 2:4)
zseq <- apply(z, 1:2, function(x) seq_len(max(x)))
zseq ## a 2 x 3 matrix
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 |
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 |
typeof(zseq) ## list
> [1] "list"
dim(zseq) ## 2 3
> [1] 2 3
zseq[1,]
> [[1]]
> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
>
> [[2]]
> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
>
> [[3]]
> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
apply(z, 3, function(x) seq_len(max(x)))
> [[1]]
> [1] 1 2 3 4 5 6
>
> [[2]]
> [1] 1 2 3 4 5 6 7 8 9 10 11 12
>
> [[3]]
> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
>
> [[4]]
> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
> [24] 24
# a list without a dim attribute
2 函数tapply(进行分组[按照factor变量]统计):
2.1 usage
tapply | R Documentation |
Apply a Function Over a Ragged Array
Description
Apply a function to each cell of a ragged array, that is to each (non-empty) group of values given by a unique combination of the levels of certain factors.
Usage
tapply(X, INDEX, FUN = NULL, ..., simplify = TRUE)
Arguments
X
|
an atomic object, typically a vector. |
INDEX
|
list of one or more factors, each of same length as |
FUN
|
the function to be applied, or |
…
|
optional arguments to |
simplify
|
If |
2.2 demo
require(stats)
groups <- as.factor(rbinom(32, n = 5, prob = 0.4))
tapply(groups, groups, length) #- is almost the same as
> 8 12 14 15
> 1 1 1 2
table(groups)
8 | 12 | 14 | 15 |
---|---|---|---|
1 | 1 | 1 | 2 |
## contingency table from data.frame : array with named dimnames
tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
L | M | H | |
---|---|---|---|
A | 401 | 216 | 221 |
B | 254 | 259 | 169 |
tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)
> tension
> L M H
> 655 475 390
n <- 17; fac <- factor(rep(1:3, length = n), levels = 1:5)
table(fac)
1 | 2 | 3 | 4 | 5 |
---|---|---|---|---|
6 | 6 | 5 | 0 | 0 |
tapply(1:n, fac, sum)
> 1 2 3 4 5
> 51 57 45 NA NA
tapply(1:n, fac, sum, simplify = FALSE)
> $`1`
> [1] 51
>
> $`2`
> [1] 57
>
> $`3`
> [1] 45
>
> $`4`
> NULL
>
> $`5`
> NULL
tapply(1:n, fac, range)
> $`1`
> [1] 1 16
>
> $`2`
> [1] 2 17
>
> $`3`
> [1] 3 15
>
> $`4`
> NULL
>
> $`5`
> NULL
tapply(1:n, fac, quantile)
> $`1`
> 0% 25% 50% 75% 100%
> 1.00 4.75 8.50 12.25 16.00
>
> $`2`
> 0% 25% 50% 75% 100%
> 2.00 5.75 9.50 13.25 17.00
>
> $`3`
> 0% 25% 50% 75% 100%
> 3 6 9 12 15
>
> $`4`
> NULL
>
> $`5`
> NULL
## example of ... argument: find quarterly means
tapply(presidents, cycle(presidents), mean, na.rm = TRUE)
> 1 2 3 4
> 58.44828 56.43333 57.22222 53.07143
ind <- list(c(1, 2, 2), c("A", "A", "B"))
table(ind)
ind.1/ind.2 | A | B |
---|---|---|
1 | 1 | 0 |
2 | 1 | 1 |
tapply(1:3, ind) #-> the split vector
> [1] 1 2 4
tapply(1:3, ind, sum)
A | B |
---|---|
1 | NA |
2 | 3 |
## Some assertions (not held by all patch propsals):
nq <- names(quantile(1:5))
stopifnot(
identical(tapply(1:3, ind), c(1L, 2L, 4L)),
identical(tapply(1:3, ind, sum),
matrix(c(1L, 2L, NA, 3L), 2, dimnames = list(c("1", "2"), c("A", "B")))),
identical(tapply(1:n, fac, quantile)[-1],
array(list(`2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq),
`3` = structure(c(3, 6, 9, 12, 15), .Names = nq),
`4` = NULL, `5` = NULL), dim=4, dimnames=list(as.character(2:5)))))
3 lapply and sapply
lapply的使用格式为:
lapply(X, FUN, ...)
lapply的返回值是和一个和X有相同的长度的list对象,这个list对象中的每个元素是将函数FUN应用到X的每一个元素。其中X为List对象(该list的每个元素都是一个向量),其他类型的对象会被R通过函数as.list()自动转换为list类型。
函数lapply是sapply函数的一个特殊情形,对一些参数的值进行了一些限定,其使用格式为:
sapply(X, FUN,..., simplify = TRUE, USE.NAMES = TRUE)
sapply(*, simplify = FALSE, USE.NAMES = FALSE) 和lapply(*)的返回值是相同的。如果参数simplify=TRUE,则函数sapply的返回值不是一个list,而是一个矩阵;若simplify=FALSE,则函数sapply的返回值仍然是一个list。
3.1 usage
lapply | R Documentation |
Apply a Function over a List or Vector
Description
lapply
returns a list of the same length as X
, each element of which is the result of applying FUN
to the corresponding element of X
.
sapply
is a user-friendly version and wrapper of lapply
by default returning a vector, matrix or, if simplify = “array”
, an array if appropriate, by applying simplify2array()
. sapply(x, f, simplify = FALSE, USE.NAMES = FALSE)
is the same as lapply(x, f)
.
vapply
is similar to sapply
, but has a pre-specified type of return value, so it can be safer (and sometimes faster) to use.
replicate
is a wrapper for the common use of sapply
for repeated evaluation of an expression (which will usually involve random number generation).
simplify2array()
is the utility called from sapply()
when simplify
is not false and is similarly called from mapply()
.
Usage
lapply(X, FUN, ...) sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE) vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE) replicate(n, expr, simplify = "array") simplify2array(x, higher = TRUE)
Arguments
X
|
a vector (atomic or list) or an |
FUN
|
the function to be applied to each element of |
…
|
optional arguments to |
simplify
|
logical or character string; should the result be simplified to a vector, matrix or higher dimensional array if possible? For |
USE.NAMES
|
logical; if |
FUN.VALUE
|
a (generalized) vector; a template for the return value from FUN. See ‘Details’. |
n
|
integer: the number of replications. |
expr
|
the expression (a language object, usually a call) to evaluate repeatedly. |
x
|
a list, typically returned from |
higher
|
logical; if true, |
3.2 demo
require(stats); require(graphics)
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
# compute the list mean for each list element
lapply(x, mean)
> $a
> [1] 5.5
>
> $beta
> [1] 4.535125
>
> $logic
> [1] 0.5
# median and quartiles for each list element
lapply(x, quantile, probs = 1:3/4)
> $a
> 25% 50% 75%
> 3.25 5.50 7.75
>
> $beta
> 25% 50% 75%
> 0.2516074 1.0000000 5.0536690
>
> $logic
> 25% 50% 75%
> 0.0 0.5 1.0
sapply(x, quantile)
a | beta | logic | |
---|---|---|---|
0% | 1.00 | 0.0497871 | 0.0 |
25% | 3.25 | 0.2516074 | 0.0 |
50% | 5.50 | 1.0000000 | 0.5 |
75% | 7.75 | 5.0536690 | 1.0 |
100% | 10.00 | 20.0855369 | 1.0 |
i39 <- sapply(3:9, seq) # list of vectors
sapply(i39, fivenum)
1.0 | 1.0 | 1 | 1.0 | 1.0 | 1.0 | 1 |
1.5 | 1.5 | 2 | 2.0 | 2.5 | 2.5 | 3 |
2.0 | 2.5 | 3 | 3.5 | 4.0 | 4.5 | 5 |
2.5 | 3.5 | 4 | 5.0 | 5.5 | 6.5 | 7 |
3.0 | 4.0 | 5 | 6.0 | 7.0 | 8.0 | 9 |
vapply(i39, fivenum,
c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))
Min. | 1.0 | 1.0 | 1 | 1.0 | 1.0 | 1.0 | 1 |
1st Qu. | 1.5 | 1.5 | 2 | 2.0 | 2.5 | 2.5 | 3 |
Median | 2.0 | 2.5 | 3 | 3.5 | 4.0 | 4.5 | 5 |
3rd Qu. | 2.5 | 3.5 | 4 | 5.0 | 5.5 | 6.5 | 7 |
Max. | 3.0 | 4.0 | 5 | 6.0 | 7.0 | 8.0 | 9 |
## sapply(*, "array") -- artificial example
(v <- structure(10*(5:8), names = LETTERS[1:4]))
> A B C D
> 50 60 70 80
f2 <- function(x, y) outer(rep(x, length.out = 3), y)
(a2 <- sapply(v, f2, y = 2*(1:5), simplify = "array"))
> , , A
>
> [,1] [,2] [,3] [,4] [,5]
> [1,] 100 200 300 400 500
> [2,] 100 200 300 400 500
> [3,] 100 200 300 400 500
>
> , , B
>
> [,1] [,2] [,3] [,4] [,5]
> [1,] 120 240 360 480 600
> [2,] 120 240 360 480 600
> [3,] 120 240 360 480 600
>
> , , C
>
> [,1] [,2] [,3] [,4] [,5]
> [1,] 140 280 420 560 700
> [2,] 140 280 420 560 700
> [3,] 140 280 420 560 700
>
> , , D
>
> [,1] [,2] [,3] [,4] [,5]
> [1,] 160 320 480 640 800
> [2,] 160 320 480 640 800
> [3,] 160 320 480 640 800
a.2 <- vapply(v, f2, outer(1:3, 1:5), y = 2*(1:5))
stopifnot(dim(a2) == c(3,5,4), all.equal(a2, a.2),
identical(dimnames(a2), list(NULL,NULL,LETTERS[1:4])))
hist(replicate(100, mean(rexp(10))))
## use of replicate() with parameters:
foo <- function(x = 1, y = 2) c(x, y)
# does not work: bar <- function(n, ...) replicate(n, foo(...))
bar <- function(n, x) replicate(n, foo(x = x))
bar(5, x = 3)
3 | 3 | 3 | 3 | 3 |
2 | 2 | 2 | 2 | 2 |
4 mapply
4.1 usage
mapply | R Documentation |
Apply a Function to Multiple List or Vector Arguments
Description
mapply
is a multivariate version of sapply
. mapply
applies FUN
to the first elements of each … argument, the second elements, the third elements, and so on. Arguments are recycled if necessary.
Usage
mapply(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE)
Arguments
FUN
|
function to apply, found via |
…
|
arguments to vectorize over (vectors or lists of strictly positive length, or all of zero length). See also ‘Details’. |
MoreArgs
|
a list of other arguments to |
SIMPLIFY
|
logical or character string; attempt to reduce the result to a vector, matrix or higher dimensional array; see the |
USE.NAMES
|
logical; use names if the first … argument has names, or if it is a character vector, use that character vector as the names. |
4.2 demo
mapply(rep, 1:4, 4:1)
> [[1]]
> [1] 1 1 1 1
>
> [[2]]
> [1] 2 2 2
>
> [[3]]
> [1] 3 3
>
> [[4]]
> [1] 4
#直接使用函数rep的结果:
rep(1:4,1:4)
> [1] 1 2 2 3 3 3 4 4 4 4
mapply(rep, times = 1:4, x = 4:1)
> [[1]]
> [1] 4
>
> [[2]]
> [1] 3 3
>
> [[3]]
> [1] 2 2 2
>
> [[4]]
> [1] 1 1 1 1
mapply(rep, times = 1:4, MoreArgs = list(x = 42))
> [[1]]
> [1] 42
>
> [[2]]
> [1] 42 42
>
> [[3]]
> [1] 42 42 42
>
> [[4]]
> [1] 42 42 42 42
mapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
> $a
> [1] 11
>
> $b
> [1] 1 2
>
> $c
> [1] -9 -8 -7
word <- function(C, k) paste(rep.int(C, k), collapse = "")
utils::str(mapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE))
> List of 6
> $ A: chr "AAAAAA"
> $ B: chr "BBBBB"
> $ C: chr "CCCC"
> $ D: chr "DDD"
> $ E: chr "EE"
> $ F: chr "F"