> ### 1) 팩터의 순서(Levels) 설정
> x1 <- c("Dec", "Apr", "Jan", "Mar")
> x2 <- c("Dec", "Apr", "Jam", "Mar")
> # 팩터 정렬은 알파벳 순서대로 진행 (A-D-J-M)
> factor(x1)
[1] Dec Apr Jan Mar
Levels: Apr Dec Jan Mar
> sort(x1)
[1] "Apr" "Dec" "Jan" "Mar"
> x1 <- c("Dec", "Apr", "Jan", "Mar")
> month_levels <- c(
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+ )
> # 팩터 순서를 month_levels대로 변경
> y1 <- factor(x1, levels = month_levels)
> y1
[1] Dec Apr Jan Mar
Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
> #> [1] Dec Apr Jan Mar
> #> Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
> #> (12 Levels: Jan Feb Mar Apr May Jun Jul Aug ... Dec)
> sort(y1)
[1] Jan Mar Apr Dec
Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
>
> # Jan이 아니라 Jam으로 잘못 입력된 부분은 로 처리
> y2 <- factor(x2, levels = month_levels)
> y2
[1] Dec Apr Mar
Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
>
> ### 2) t(x)를 사용하여 벡터를 데이터 프레임으로 변경
> library(UsingR)
> x <- sapply(kid.weights[, 1:3], mean); x
age weight height
47.948 38.384 36.524
> as.data.frame(x)
x
age 47.948
weight 38.384
height 36.524
> # transpose: 데이터 프레임을 전치시킴
> as.data.frame(t(x))
age weight height
1 47.948 38.384 36.524
>
> ### 3) 데이터 객체의 정보를 알 수 있는 함수
> ## : 저장 유형 mode(), 객체지향 관점 속성 class(), 객체 타입 typeof()
> ## mode(): (numeric), character, logical, (list)
> ## class(): (numeric, factor), character, logical, (data.frame)
> ## typeof(): (interger, double), character, logical, (list)
>
> # 예제1: numeric
> a <- 555
> mode(a)
[1] "numeric"
> class(a)
[1] "numeric"
> typeof(a)
[1] "double"
> typeof(1:100)
[1] "integer"
>
> # 예제2: character
> a <- "Data Science & Big Data"
> mode(a)
[1] "character"
> class(a)
[1] "character"
> typeof(a)
[1] "character"
> # 예제3: logical
> a <- (10 > 7)
> mode(a)
[1] "logical"
> class(a)
[1] "logical"
> typeof(a)
[1] "logical"
>
> # 예제4: factor
> gender <- c("M", "M", "F")
> gender <- factor(gender)
> gender
[1] M M F
Levels: F M
> mode(gender)
[1] "numeric"
> class(gender)
[1] "factor"
> typeof(gender)
[1] "integer"
> # 예제5: data.frame
> name <- c("Cha", "Park", "Jung")
> toeic <- c(900, 690, 730)
> gpa <- c(3.8, 4.5, 3.1)
> student <- data.frame(name, toeic, gpa)
> str(student)
'data.frame': 3 obs. of 3 variables:
$ name : chr "Cha" "Park" "Jung"
$ toeic: num 900 690 730
$ gpa : num 3.8 4.5 3.1
> mode(student)
[1] "list"
> class(student)
[1] "data.frame"
> typeof(student)
[1] "list"
> # 예제6: kid.weights의 네 변수
> # (sapply를 통한 벡터연산)
> library(UsingR)
> head(kid.weights)
age weight height gender
1 58 38 38 M
2 103 87 43 M
3 87 50 48 M
4 138 98 61 M
5 82 47 47 F
6 52 30 24 F
> str(kid.weights)
'data.frame': 250 obs. of 4 variables:
$ age : num 58 103 87 138 82 52 28 79 107 45 ...
$ weight: num 38 87 50 98 47 30 24 45 144 24 ...
$ height: num 38 43 48 61 47 24 29 48 59 24 ...
$ gender: Factor w/ 2 levels "F","M": 2 2 2 2 1 1 2 1 2 2 ...
> sapply(kid.weights, class)
age weight height gender
"numeric" "numeric" "numeric" "factor"
> sapply(kid.weights, mode)
age weight height gender
"numeric" "numeric" "numeric" "numeric"
> sapply(kid.weights, typeof)
age weight height gender
"double" "double" "double" "integer"
>
> # str(kid.weights)의 자료구조는 class(속성)로 표시됨
>
> ### 4) 객체의 속성에 따른 summary() 결과
> name <- c("Cha", "Park", "Jung")
> gender <- c("M", "F", "M")
> toeic <- c(900, 690, 730)
> gpa <- c(3.8, 4.5, 3.1)
> student <- data.frame(name, gender, toeic, gpa)
> str(student)
'data.frame': 3 obs. of 4 variables:
$ name : chr "Cha" "Park" "Jung"
$ gender: chr "M" "F" "M"
$ toeic : num 900 690 730
$ gpa : num 3.8 4.5 3.1
> student$gender <- factor(student$gender)
> str(student)
'data.frame': 3 obs. of 4 variables:
$ name : chr "Cha" "Park" "Jung"
$ gender: Factor w/ 2 levels "F","M": 2 1 2
$ toeic : num 900 690 730
$ gpa : num 3.8 4.5 3.1
> summary(student)
name gender toeic gpa
Length:3 F:1 Min. :690.0 Min. :3.10
Class :character M:2 1st Qu.:710.0 1st Qu.:3.45
Mode :character Median :730.0 Median :3.80
Mean :773.3 Mean :3.80
3rd Qu.:815.0 3rd Qu.:4.15
Max. :900.0 Max. :4.50
'데이터 [Data] > R' 카테고리의 다른 글
대한민국 범죄현황 분석 - 인공지능 개발자 양성과정 R 프로젝트 (0) | 2021.11.12 |
---|---|
R 데이터시각화 함수를 활용한 탐색적 자료분석 (0) | 2021.06.10 |
R plot: 이산형 분포의 근사 (0) | 2021.06.09 |
R plot: 이산형 분포의 누적분포함수 (0) | 2021.06.08 |
R plot: 이산형 분포의 확률밀도함수 (0) | 2021.06.08 |
댓글