R语言入门

2019-11-08 02:43:27

字体：大中小

来源：转载

供稿：网友

R语言的数据结构

对象的五种基本类型：（1）字符（character）（2）数值(numeric: real numbers) （3）整数(integer) （4）复数(complex: 1+2i) （5）逻辑(logical :True / Fulse)

> x <- 1 //赋值> x[1] 1> > class(x) [1] "numeric"> x <- 2L //整数后面加大写L表示为整数型> class(x)[1] "integer"> y <- "hello world"> class(y)[1] "character"> t <- TRUE> x <- 1+2i> class(x)[1] "complex">

对象的属性：（1）名称（2）维度（3）类型（3）长度

数据结构：

（1）向量：只能包含同一种类型的对象

# vector x <- vector("character", length = 10)//创建一个长度为10的字符型向量x1 <- 1:4//x1为1到4的一个长为4的向量x2 <- c(1,2,3,4)//直接创建一个向量x3 <- c(TRUE,10,"a")//系统会自动把这三个不同类型的转化为同一类型x4 <- c("a","b","c")as.numeric(x4)as.logical()as.character()//强制转换class(x1)names(x1) <- c("a","b","c","d")//给变量起名字> x1a b c d 1 2 3 4

（2）矩阵

——向量+维度属性（整数向量：nrow，ncol）

#Matrix//矩阵的创建x <- matrix(1:6, nrow = 3, ncol = 2)xdim(x)attributes(x)y <- 1:6 //先创建一个向量，再赋值维度属性dim(y) <- c(2,3)yy2 <- matrix(1:6, nrow = 2, ncol = 3)rbind(y,y2)//按行拼接cbind(y,y2)//按列拼接//执行步骤> x <- matrix(nrow = 3, ncol = 2)> x [,1] [,2][1,] NA NA[2,] NA NA[3,] NA NA> x <- matrix(1:6, nrow = 3, ncol = 2)> x [,1] [,2][1,] 1 4[2,] 2 5[3,] 3 6> dim(x)[1] 3 2> attributes(x)$dim[1] 3 2> y <- 1:6> dim(y) <- c(2,3)> y [,1] [,2] [,3][1,] 1 3 5[2,] 2 4 6> y2 <- matrix(1:6, nrow = 2, ncol = 3)> rbind(y,y2) [,1] [,2] [,3][1,] 1 3 5[2,] 2 4 6[3,] 1 3 5[4,] 2 4 6> cbind(y,y2) [,1] [,2] [,3] [,4] [,5] [,6][1,] 1 3 5 1 3 5[2,] 2 4 6 2 4 6>

（3）数组 ——与矩阵类似，但是维度可以大于2

#array//数组的创建x <- array(1:24, dim = c(4,6))x <- array(1:24, dim = c(2,3,4))x

执行步骤：

> x <- array(1:24, dim = c(4,6))> x [,1] [,2] [,3] [,4] [,5] [,6][1,] 1 5 9 13 17 21[2,] 2 6 10 14 18 22[3,] 3 7 11 15 19 23[4,] 4 8 12 16 20 24> x <- array(1:24, dim = c(2,3,4))> x, , 1 [,1] [,2] [,3][1,] 1 3 5[2,] 2 4 6, , 2 [,1] [,2] [,3][1,] 7 9 11[2,] 8 10 12, , 3 [,1] [,2] [,3][1,] 13 15 17[2,] 14 16 18, , 4 [,1] [,2] [,3][1,] 19 21 23[2,] 20 22 24

（4）列表

#list//列表的创建l <- list("a", 2, 10L, 3+4i, TRUE)l2 <- list(a=1, b=2, c=3)l3 <- list(c(1,2,3), c(4,5,6,7))x <- matrix(1:6, nrow = 2, ncol = 3)dimnames(x) <- list(c("a","b"), c("c","d","e"))

执行步骤：

> l <- list("a", 2, 10L, 3+4i, TRUE)> l[[1]][1] "a"[[2]][1] 2[[3]][1] 10[[4]][1] 3+4i[[5]][1] TRUE> > > l2 <- list(a=1, b=2, c=3)> l2$a[1] 1$b[1] 2$c[1] 3> l3 <- list(c(1,2,3), c(4,5,6,7))> > > l3[[1]][1] 1 2 3[[2]][1] 4 5 6 7> x <- matrix(1:6, nrow = 2, ncol = 3)> x [,1] [,2] [,3][1,] 1 3 5[2,] 2 4 6> dimnames(x) <- list(c("a","b"), c("c","d","e"))> x c d ea 1 3 5b 2 4 6>

（5）因子（factor）

——分类数据/有序 vs. 无序

——整数型向量+标签（label）（优于整数向量） Male/Female vs. 1/2 常用于 lm( ),glm( )

//创建因子> x <- factor(c("female","female","male","male","female"))> x[1] female female male male femaleLevels: female male> x <- factor(c("female","female","male","male","female"), levels = c("male","female"))> x[1] female female male male femaleLevels: male female//查看因子> table(x)x male female 2 3 > unclass(x)[1] 2 2 1 1 2attr(,"levels")[1] "male" "female"> class(unclass(x))[1] "integer"

（6）缺失值（missing value）

——NA/NaN：NaN属于NA，NA不属于NaN ——NA有类型属性：interger NA，character NA等 ——is.na()/is.nan()

(8) 数据框 ——存储表格数据 ——视为各元素长度相同的列表

每个元素代表一列数据每个元素的长度代表行数元素类型可以不同//创建一个数据框> df <- data.frame(id = c(1,2,3,4), name = c("a","b","c","d"), gender = c(TRUE,TRUE,FALSE,FALSE))> df id name gender1 1 a TRUE2 2 b TRUE3 3 c FALSE4 4 d FALSE//查看行列> nrow(df)[1] 4> ncol(df)[1] 3> df2 <- data.frame(id = c(1,2,3,4), score = c(80,90,86,100))> df2 id score1 1 802 2 903 3 864 4 100//转化为矩阵> data.matrix(df2) id score[1,] 1 80[2,] 2 90[3,] 3 86[4,] 4 100>

（9）时间和日期

——日期：Date

距离1970-01-01的天数/date（）/Sys.Date()/weekdays()/months()/quarters()//查询当前时间> x <-date()> x[1] "Sat Feb 18 20:32:20 2017"> class(x)[1] "character"//查询当前时间（表示方式不同）> x2 <- Sys.Date()> x2[1] "2017-02-18"> class(x2)[1] "Date"//创建一个时间> x3 <- as.Date("2016-01-01")> x3[1] "2016-01-01"//查询时间的当前属性> class(x3)[1] "Date"> weekdays(x3)[1] "星期五"> months(x3)[1] "一月"> quarters(x3) //季度[1] "Q1"> julian(x3) //距离1970年过去了多少天[1] 16801attr(,"origin")[1] "1970-01-01"//时间运算> x4 <- as.Date("2017-01-01")> x4[1] "2017-01-01"> x4-x3Time difference of 366 days> as.numeric(x4-x3)[1] 366

——时间：POSIXct / POSIXIt

距离1970-01-01的秒数 / Sys.time()POSIXct: 整数，常用来存入数据框POSIXIt：列表，还包含星期、年、月、日等信息//get time> x <- Sys.time()> x[1] "2017-02-18 20:47:07 CST"> class(x)[1] "POSIXct" "POSIXt" > p <- as.POSIXlt(x)> p[1] "2017-02-18 20:47:07 CST"> class(p)[1] "POSIXlt" "POSIXt" //去掉类型，查看当前列表的属性> names(unclass(p)) [1] "sec" "min" "hour" "mday" "mon" "year" "wday" "yday" [9] "isdst" "zone" "gmtoff" //用变量名$属性就能查看指定属性> p$sec[1] 7.927575//规范化时间输出格式> x1 <- "一月 1, 2015 01:01"> strptime(x1,"%B %d, %Y %H:%M")[1] "2015-01-01 01:01:00 CST">