R语言实战 - 基本数据管理(1)

1. 一个示例

> manager <- c(1,2,3,4,5)
> date <- c("10/24/08", "10/28/08", "10/1/08", "10/12/08", "5/1/09")
> country <- c("US", "US", "UK", "UK", "UK")
> gender <- c("M", "F", "F", "M", "F")
> age <- c(32, 45, 25, 39, 99)
> q1 <- c(5, 3, 3, 3, 2)
> q2 <- c(4, 5, 5, 3, 2)
> q3 <- c(5, 2, 5, 4, 1)
> q4 <- c(5, 5, 5, NA, 2)
> q5 <- c(5, 5, 2, NA, 1)
> leadership <- data.frame(manager, date, country, gender, age, q1, q2, 
+ q3, q4, q5, stringAsFactors=FALSE)
> leadership
  manager     date country gender age q1 q2 q3 q4 q5 stringAsFactors
1       1 10/24/08      US      M  32  5  4  5  5  5           FALSE
2       2 10/28/08      US      F  45  3  5  2  5  5           FALSE
3       3  10/1/08      UK      F  25  3  5  5  5  2           FALSE
4       4 10/12/08      UK      M  39  3  3  4 NA NA           FALSE
5       5   5/1/09      UK      F  99  2  2  1  2  1           FALSE

2. 创建新变量

> mydata <- data.frame(x1=c(2,2,6,4), x2=c(3,4,2,8))
> mydata
  x1 x2
1  2  3
2  2  4
3  6  2
4  4  8
> mydata$sumx <- mydata$x1 + mydata$x2
> mydata$sumx
[1]  5  6  8 12
> mydata$mean <- (mydata$x1 + mydata$x2)/2
> mydata$mean
[1] 2.5 3.0 4.0 6.0
> attach(mydata)
> mydata$sumx <- x1 + x2
> mydata$sumx
[1]  5  6  8 12
> mydata$mean <- (x1 + x2)/2
> mydata$mean
[1] 2.5 3.0 4.0 6.0
> detach(mydata)
> mydata
  x1 x2 sumx mean
1  2  3    5  2.5
2  2  4    6  3.0
3  6  2    8  4.0
4  4  8   12  6.0
> mydata <- transform(mydata, sumx = x1+x2, meanx = (x1+x2)/2)
> mydata
  x1 x2 sumx mean meanx
1  2  3    5  2.5   2.5
2  2  4    6  3.0   3.0
3  6  2    8  4.0   4.0
4  4  8   12  6.0   6.0

3. 变量的重编码

> leadership$age[leadership$age == 99] <- NA
> leadership$agecat[leadership$age > 75] <- "Elder"
> leadership$agecat[leadership$age>=55 $
+ leadership$age<=75] <- "Middle Aged"
Error: unexpected '<=' in:
"leadership$agecat[leadership$age>=55 $
leadership$age<="
> leadership$agecat[leadership$age >= 55 & leadership$age <=75] <- "Middle Aged"
> leadership$agecat[leadership$age < 55] <- "Young"
> 
> leadership <- within(leadership, {
+                      agecat <- NA
+                      agecat[age > 75]          <- "Elder"
+                      agecat[age >= 55 & age <= 75]          <- "Middle Aged"
+                      agecat[age < 55]                       <- "Young"})
> leadership
  manager     date country gender age q1 q2 q3 q4 q5 stringAsFactors agecat
1       1 10/24/08      US      M  32  5  4  5  5  5           FALSE  Young
2       2 10/28/08      US      F  45  3  5  2  5  5           FALSE  Young
3       3  10/1/08      UK      F  25  3  5  5  5  2           FALSE  Young
4       4 10/12/08      UK      M  39  3  3  4 NA NA           FALSE  Young
5       5   5/1/09      UK      F  NA  2  2  1  2  1           FALSE   <NA>

4. 变量的重命名

> fix(leadership)
> 
> library(reshape)
Error in library(reshape) : there is no package called ‘reshape’
> install.packages("reshape")
Installing package into ‘C:/Users/WZhong/Documents/R/win-library/3.4’
(as ‘lib’ is unspecified)
also installing the dependencies ‘Rcpp’, ‘plyr’

trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/Rcpp_0.12.12.zip'
Content type 'application/zip' length 3319142 bytes (3.2 MB)
downloaded 3.2 MB

trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/plyr_1.8.4.zip'
Content type 'application/zip' length 1220105 bytes (1.2 MB)
downloaded 1.2 MB

trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/reshape_0.8.7.zip'
Content type 'application/zip' length 128278 bytes (125 KB)
downloaded 125 KB

package ‘Rcpp’ successfully unpacked and MD5 sums checked
package ‘plyr’ successfully unpacked and MD5 sums checked
Warning: unable to move temporary installation ‘C:\Users\WZhong\Documents\R\win-library\3.4\file2cd073ad6c49\plyr’ to ‘C:\Users\WZhong\Documents\R\win-library\3.4\plyr’
package ‘reshape’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
        C:\Users\WZhong\AppData\Local\Temp\RtmpiKKe8J\downloaded_packages
> library(reshape)
Error: package or namespace load failed for ‘reshape’ in loadNamespace(i, c(lib.loc, .libPaths()), versionCheck = vI[[i]]):
 there is no package called ‘plyr’
> install.packages(plyr)
Error in install.packages(plyr) : object 'plyr' not found
> install.packages("plyr")
Installing package into ‘C:/Users/WZhong/Documents/R/win-library/3.4’
(as ‘lib’ is unspecified)
trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/plyr_1.8.4.zip'
Content type 'application/zip' length 1220105 bytes (1.2 MB)
downloaded 1.2 MB

package ‘plyr’ successfully unpacked and MD5 sums checked
Warning: unable to move temporary installation ‘C:\Users\WZhong\Documents\R\win-library\3.4\file2cd057b1e2f\plyr’ to ‘C:\Users\WZhong\Documents\R\win-library\3.4\plyr’

The downloaded binary packages are in
        C:\Users\WZhong\AppData\Local\Temp\RtmpiKKe8J\downloaded_packages
> library(plyr)
Error in library(plyr) : there is no package called ‘plyr’
> library(reshape)
Error: package or namespace load failed for ‘reshape’ in loadNamespace(i, c(lib.loc, .libPaths()), versionCheck = vI[[i]]):
 there is no package called ‘plyr’
> plyr
Error: object 'plyr' not found
> library(plyr
+ )
Error in library(plyr) : there is no package called ‘plyr’
> install.packages("plyr")
Installing package into ‘C:/Users/WZhong/Documents/R/win-library/3.4’
(as ‘lib’ is unspecified)
trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/plyr_1.8.4.zip'
Content type 'application/zip' length 1220105 bytes (1.2 MB)
downloaded 1.2 MB

package ‘plyr’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
        C:\Users\WZhong\AppData\Local\Temp\RtmpiKKe8J\downloaded_packages
> library(plyr)
> library(reshape)

Attaching package: ‘reshape’

The following objects are masked from ‘package:plyr’:

    rename, round_any

> leadership <- rename(leadership, c(manager="managerID", date="testDate"))
> leadreship
Error: object 'leadreship' not found
> leadership
  managerID testDate country gender age q1 q2 q3 q4 q5 stringAsFactors agecat
1         1 10/24/08      US      M  32  5  4  5  5  5           FALSE  Young
2         2 10/28/08      US      F  45  3  5  2  5  5           FALSE  Young
3         3  10/1/08      UK      F  25  3  5  5  5  2           FALSE  Young
4         4 10/12/08      UK      M  39  3  3  4 NA NA           FALSE  Young
5         5   5/1/09      UK      F  NA  2  2  1  2  1           FALSE   <NA>
> names(leadership)[2] <- "testDate"
> names(leadership)
 [1] "managerID"       "testDate"        "country"         "gender"         
 [5] "age"             "q1"              "q2"              "q3"             
 [9] "q4"              "q5"              "stringAsFactors" "agecat"         
> names(leadership)[1] <- "manager"
> names(leadership)
 [1] "manager"         "testDate"        "country"         "gender"         
 [5] "age"             "q1"              "q2"              "q3"             
 [9] "q4"              "q5"              "stringAsFactors" "agecat"         
> names(leadership)[6:10] <- c("item1", "item2", "item3", "item4", "item5")
> names(leadership)
 [1] "manager"         "testDate"        "country"         "gender"         
 [5] "age"             "item1"           "item2"           "item3"          
 [9] "item4"           "item5"           "stringAsFactors" "agecat"         
> 

  

posted on 2017-09-05 23:35  你的踏板车要滑向哪里  阅读(330)  评论(0编辑  收藏  举报

导航