R (4)

sink()

1
2
3
4
5
6
7
8
setwd("/Users/zerohertz/RData")
air=airquality
str(air)
attach(air)
sink('output.txt') #콘솔창 결과를 외부 파일로 저장
mean(Temp)
sd(Temp)
sink() #끝났다는 표시

output.txt

output.txt

  • setwd() 해두고 sink() 실행

pdf()

1
2
3
pdf('plot.pdf') #그래프를 pdf파일로 별도저장
plot(Temp,Wind)
dev.off() #끝났다는 표시

plot.pdf

plot.pdf


print(), cat()

1
2
3
4
5
6
7
8
> print(pi)
[1] 3.141593
> print('원주율은',pi,'이다')
Error in print.default("원주율은", pi, "이다") : invalid 'quote' argument
> print('dd') #하나만 됨
[1] "dd"
> cat('원주율은',pi,'이다')
원주율은 3.141593 이다
1
cat('x의 평균 : ',mean(x),'\n','x의 표준편차 : ',sd(x),file='output1.txt')

output1.txt

output1.txt


write()

1
2
str(women)
write.table(women,'women.txt') #R에서 생성된 Data를 외부 파일로 보냄

women.txt

women.txt


xlsx()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
library(xlsx)
write.xlsx(women,'women.xlsx')
write.xlsx(women,'women1.xlsx',row.names=F)
~~~~

> women.xlsx, women1.xlsx

![women.xlsx](/images/r-4/women.xlsx.png)
***
# seq()

~~~R
> 1:5
[1] 1 2 3 4 5
> -3:3
[1] -3 -2 -1 0 1 2 3
> 5:0
[1] 5 4 3 2 1 0
> seq(0,10,by=0.1)
[1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8
[30] 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7
[59] 5.8 5.9 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8.0 8.1 8.2 8.3 8.4 8.5 8.6
[88] 8.7 8.8 8.9 9.0 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10.0
> seq(0,10,by=2)
[1] 0 2 4 6 8 10
> seq(0,10,length=3)
[1] 0 5 10

rep()

1
2
3
4
5
6
> rep(1,10)
[1] 1 1 1 1 1 1 1 1 1 1
> rep(c(1,2),5)
[1] 1 2 1 2 1 2 1 2 1 2
> rep(c('M','F'),c(2,3))
[1] "M" "M" "F" "F" "F"

Etc.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
> abs(-10) #absolute
[1] 10
> sqrt(10) #square root
[1] 3.162278
> ceiling(3.475) #올림
[1] 4
> floor(3.475) #버림
[1] 3
> trunc(5.99) #버림
[1] 5
> round(3.475, 2) #반올림
[1] 3.48
> signif(0.00347, 2)
[1] 0.0035
> signif(3.12345, 3)
[1] 3.12
> signif(3.1251, 3)
[1] 3.13
> log(10)
[1] 2.302585
> exp(10)
[1] 22026.47
>
> x = c(1, 2, 3, 4, 5)
> median(x)
[1] 3
> mean(x)
[1] 3
> IQR(x) #사분위수 범위 3/4수-1/4수(75%-25%)
[1] 2
> sd(x)
[1] 1.581139
> diff(x,lag = 1) #lag-차분, 주식과 같은 시계열 Data에 사용
[1] 1 1 1 1
  • 자기상관이 있으면 시계열 Data(ex. 계절따라 가스 수입)

nchar()

1
2
3
4
> y=c('asdf','zxcv','qwer');y
[1] "asdf" "zxcv" "qwer"
> nchar(y) #각 문자의 개수
[1] 4 4 4

substr()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
> substr('응용통계학',1,3) #중요
[1] "응용통"
> time='201912311504'
> year=substr(time,1,4);year
[1] "2019"
> month=substr(time,5,6);month
[1] "12"
> day=substr(time,7,8);day
[1] "31"
> hour=substr(time,9,10);hour
[1] "15"
> sec=substr(time,11,12);sec
[1] "04"
> z=c('응용통계학','정보통계학')
> substr(z,c(1,3),c(2,5))
[1] "응용" "통계학"
> cities=c('New York,NY','Ann Arbor,MI','Chicago,IL')
> states=substr(cities,nchar(cities)-1,nchar(cities));states
[1] "NY" "MI" "IL"
> city=strsplit(cities,split=',');city
[[1]]
[1] "New York" "NY"

[[2]]
[1] "Ann Arbor" "MI"

[[3]]
[1] "Chicago" "IL"
  • slicing

산술연산자, 비교연산자, 논리연산자, 배정연산자(우선순위)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
> i=1+2&0+3;i
[1] TRUE
> i=1+2&0;i
[1] FALSE
> x=c(80,88,90,93,95,94,99,78,65)
> cat.x=(x>=100)+(x<90)+(x<80);cat.x
[1] 1 1 0 0 0 0 0 2 2
> cat.x1=factor(cat.x,labels=c('A','B','C'));cat.x1 #0~79/80~89/90~
[1] B B A A A A A C C
Levels: A B C
>
> cat.x2=(x<=100)+(x<90)+(x<80);cat.x2
[1] 2 2 1 1 1 1 1 3 3
> cat.x3=factor(cat.x2,labels=c('A','B','C'));cat.x3
[1] B B A A A A A C C
Levels: A B C
> 5%%2
[1] 1
  • 산술연산자 : (), ^, **, %%, %/%, %*%, *, /, +, -
  • 비교연산자 : <, <=, >, >=, ==, !=, !
  • 논리연산자 : &, &&, |, ||
  • 배정(할당)연산자 : <-, ->, =

cut()

1
2
3
> cat.x4=cut(x,breaks=c(0,80,90,100),include.lowest=T,right=F,labels=c('C','B','A'));cat.x4
[1] B B A A A A A C C
Levels: C B A

na.rm

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
> air = airquality
> str(air)
'data.frame': 153 obs. of 6 variables:
$ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
$ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
$ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
$ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
$ Month : int 5 5 5 5 5 5 5 5 5 5 ...
$ Day : int 1 2 3 4 5 6 7 8 9 10 ...
> attach(air)
> mean(Ozone)
[1] NA
> is.na(Ozone)
[1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
[17] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE
[33] TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE FALSE FALSE
[49] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE
[65] TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
[81] FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[97] FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
[113] FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[129] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
> summary(Ozone)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.00 18.00 31.50 42.13 63.25 168.00 37
> mean(Ozone,na.rm=T)
[1] 42.12931
> sd(Ozone,na.rm=T)
[1] 32.98788

$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
> x=data.frame(x1=c(1,2,3,4),x2=c(5,6,7,8));x
x1 x2
1 1 5
2 2 6
3 3 7
4 4 8
> attach(x)
> sum_x=x1+x2;sum_x
[1] 6 8 10 12
> x$sum_x=x1+x2;x
x1 x2 sum_x
1 1 5 6
2 2 6 8
3 3 7 10
4 4 8 12
> air$cat.Ozone=cut(Ozone,breaks=c(0,35,100000),include.lowest=T,right=F,labels=c('Low','High'));air
Ozone Solar.R Wind Temp Month Day cat.Ozone
1 41 190 7.4 67 5 1 High
2 36 118 8.0 72 5 2 High
3 12 149 12.6 74 5 3 Low
...
151 14 191 14.3 75 9 28 Low
152 18 131 8.0 76 9 29 Low
153 20 223 11.5 68 9 30 Low
> air$Ozone_sqrt=sqrt(Ozone);air
Ozone Solar.R Wind Temp Month Day cat.Ozone Ozone_sqrt
1 41 190 7.4 67 5 1 High 6.403124
2 36 118 8.0 72 5 2 High 6.000000
3 12 149 12.6 74 5 3 Low 3.464102
...
151 14 191 14.3 75 9 28 Low 3.741657
152 18 131 8.0 76 9 29 Low 4.242641
153 20 223 11.5 68 9 30 Low 4.472136
  • $를 이용하면 바로 들어감

sort()

1
2
3
4
5
6
7
8
9
10
11
12
> sort(air$Temp)
[1] 56 57 57 57 58 58 59 59 61 61 61 62 62 63 64 64 65 65 66 66 66 67 67 67 67 68 68 68 68 69 69 69
[33] 70 71 71 71 72 72 72 73 73 73 73 73 74 74 74 74 75 75 75 75 76 76 76 76 76 76 76 76 76 77 77 77
[65] 77 77 77 77 78 78 78 78 78 78 79 79 79 79 79 79 80 80 80 80 80 81 81 81 81 81 81 81 81 81 81 81
[97] 82 82 82 82 82 82 82 82 82 83 83 83 83 84 84 84 84 84 85 85 85 85 85 86 86 86 86 86 86 86 87 87
[129] 87 87 87 88 88 88 89 89 90 90 90 91 91 92 92 92 92 92 93 93 93 94 94 96 97
> sort(air$Temp,decreasing=T) #내림차순 설정안하면 오름차순
[1] 97 96 94 94 93 93 93 92 92 92 92 92 91 91 90 90 90 89 89 88 88 88 87 87 87 87 87 86 86 86 86 86
[33] 86 86 85 85 85 85 85 84 84 84 84 84 83 83 83 83 82 82 82 82 82 82 82 82 82 81 81 81 81 81 81 81
[65] 81 81 81 81 80 80 80 80 80 79 79 79 79 79 79 78 78 78 78 78 78 77 77 77 77 77 77 77 76 76 76 76
[97] 76 76 76 76 76 75 75 75 75 74 74 74 74 73 73 73 73 73 72 72 72 71 71 71 70 69 69 69 68 68 68 68
[129] 67 67 67 67 66 66 66 65 65 64 64 63 62 62 61 61 61 59 59 58 58 57 57 57 56

order()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
> air$Temp
[1] 67 72 74 62 56 66 65 59 61 69 74 69 66 68 58 64 66 57 68 62 59 73 61 61 57 58 57 67 81 79 76 78
[33] 74 67 84 85 79 82 87 90 87 93 92 82 80 79 77 72 65 73 76 77 76 76 76 75 78 73 80 77 83 84 85 81
[65] 84 83 83 88 92 92 89 82 73 81 91 80 81 82 84 87 85 74 81 82 86 85 82 86 88 86 83 81 81 81 82 86
[97] 85 87 89 90 90 92 86 86 82 80 79 77 79 76 78 78 77 72 75 79 81 86 88 97 94 96 94 91 92 93 93 87
[129] 84 80 78 75 73 81 76 77 71 71 78 67 76 68 82 64 71 81 69 63 70 77 75 76 68
> sort(air$Temp)
[1] 56 57 57 57 58 58 59 59 61 61 61 62 62 63 64 64 65 65 66 66 66 67 67 67 67 68 68 68 68 69 69 69
[33] 70 71 71 71 72 72 72 73 73 73 73 73 74 74 74 74 75 75 75 75 76 76 76 76 76 76 76 76 76 77 77 77
[65] 77 77 77 77 78 78 78 78 78 78 79 79 79 79 79 79 80 80 80 80 80 81 81 81 81 81 81 81 81 81 81 81
[97] 82 82 82 82 82 82 82 82 82 83 83 83 83 84 84 84 84 84 85 85 85 85 85 86 86 86 86 86 86 86 87 87
[129] 87 87 87 88 88 88 89 89 90 90 90 91 91 92 92 92 92 92 93 93 93 94 94 96 97
> order(air$Temp)
[1] 5 18 25 27 15 26 8 21 9 23 24 4 20 148 16 144 7 49 6 13 17 1 28 34
[25] 140 14 19 142 153 10 12 147 149 137 138 145 2 48 114 22 50 58 73 133 3 11 33 82
[49] 56 115 132 151 31 51 53 54 55 110 135 141 152 47 52 60 108 113 136 150 32 57 111 112
[73] 131 139 30 37 46 107 109 116 45 59 76 106 130 29 64 74 77 83 92 93 94 117 134 146
[97] 38 44 72 78 84 87 95 105 143 61 66 67 91 35 62 65 79 129 36 63 81 86 97 85
[121] 88 90 96 103 104 118 39 41 80 98 128 68 89 119 71 99 40 100 101 75 124 43 69 70
[145] 102 125 42 126 127 121 123 122 120
> air[order(air$Temp),]
Ozone Solar.R Wind Temp Month Day
5 NA NA 14.3 56 5 5
18 6 78 18.4 57 5 18
25 NA 66 16.6 57 5 25
27 NA NA 8.0 57 5 27
15 18 65 13.2 58 5 15
26 NA 266 14.9 58 5 26
8 19 99 13.8 59 5 8
21 1 8 9.7 59 5 21
9 8 19 20.1 61 5 9
23 4 25 9.7 61 5 23
24 32 92 12.0 61 5 24
4 18 313 11.5 62 5 4
20 11 44 9.7 62 5 20
148 14 20 16.6 63 9 25
16 14 334 11.5 64 5 16
144 13 238 12.6 64 9 21
7 23 299 8.6 65 5 7
49 20 37 9.2 65 6 18
6 28 NA 14.9 66 5 6
13 11 290 9.2 66 5 13
17 34 307 12.0 66 5 17
1 41 190 7.4 67 5 1
28 23 13 12.0 67 5 28
34 NA 242 16.1 67 6 3
...
  • order()를 통해 작은 순서대로의 Index 값을 구할 수 있음