R 语言入门(Ubuntu)

安装,启动,退出:

# 安装
sudo
apt-get install r-base r-base-dev
# 启动
R
# 退出
q()

 安装package:

install.packages("randomForest")
install.views("MachineLearning")
RSiteSearch("confusion", restrict = "functions")

 加载package:

library(randomForest)
# 查看已经加载的package的信息
sessionInfo()

创建对象:

pages <- 97
town <- "Richmond"

基本数据类型和结构:

if 3 > 2 print("greater") else print("less")
isGreater <- 3 > 2
isGreater
is.logical(isGreater)
x <- 3.6
is.numeric(x)
is.integer(x)
is.double(x)
typeof(x)
y <- "your ad here"
typeof(y)
# length of y
nchar(y)
# determine if a substring exists in the character string
grep("ad", y)
grep("my", y)

# vector, c is for combine
weights <- c(90, 150, 111, 123)
is.vector(weights)
typeof(weights)
length(weights)
# [1] 90.25 150.25 111.25 123.25
weights + .25
mean(weights)

#string vector
colors <- c("green", "red", "blue", "red", "white")
grep("red", colors)
nchar(colors)

# index
weights[c(1, 4)]

# A vector of logical values can be used also but there should
# be as many logical values as elements
weights[c(TRUE, TRUE, FALSE, TRUE)]

#missing value
probabilities <- c(.05, .67, NA, .32, .90)
is.na(probabilities)
mean(probabilities) #[1] NA ... unless told otherwise
mean(probabilities, na.rm = TRUE) # [1] 0.485
 

数据集操作,matrix, data frames:

mat <- matrix(1:12, nrow = 3)
rownames(mat) <- c("row 1", "row 2", "row 3")
colnames(mat) <- c("col1", "col2", "col3", "col4")
#      col1 col2 col3 col4
#row 1  1    4    7    10
#row 2  2    5    8    11
#row 3  3    6    9    12

mat[1, 2:3]
# col2 col3
# 4 7

mat["row 1", "col3"]
mat[1,]
#col1 col2 col3 col4
#1 4 7 10
is.matrix(mat[1,])
# [1] FALSE
is.vector(mat[1,])
# [1] TRUE

mat[1,,drop = FALSE]
#col1 col2 col3 col4
#row 1 1 4 7 10

is.matrix(mat[1,,drop = FALSE])
# [1] TRUE

is.vector(mat[1,,drop = FALSE])
#[1] FALSE
df <- data.frame(colors = colors2, time = 1:5)
# colors time
# 1 green 1
# 2 red 2
# 3 blue 3
# 4 red 4
# 5 white 5

dim(df)
#[1] 5 2

colnames(df)
# [1] "colors" "time“
rownames(df)
[1] "1" "2" "3" "4" "5"

df$colors
# [1] green red blue red white
# Levels: blue green red white

subset(df, colors %in% c("red", "green") & time <= 2)
#colors time
#1 green 1
#2 red 2

df2 <- df
# Add missing values to the data frame
df2[1, 1] <- NA
df2[5, 2] <- NA
#colors time
#1 <NA> 1
#2 red 2
#3 blue 3
#4 red 4
#5 white NA

complete.cases(df2)
#[1] FALSE TRUE TRUE TRUE FALSE

读取csv文件:

read.csv("data.csv")
read.csv(header = FALSE, file = "data.csv")
read.csv("data.csv", na.strings = "?")

 

posted @ 2017-06-24 11:11  0xAC  阅读(356)  评论(0编辑  收藏  举报