Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
2 views

R Programming Tutorial for Beginners (1)

This document is a comprehensive tutorial on R programming for beginners, covering data manipulation, graphics creation, data analysis, and advanced techniques. It includes practical examples and code snippets for creating vectors, data frames, and visualizations using both base R and the ggplot2 package. Additionally, it demonstrates statistical analysis methods such as linear regression and descriptive statistics.

Uploaded by

HARRISON SAEZ
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

R Programming Tutorial for Beginners (1)

This document is a comprehensive tutorial on R programming for beginners, covering data manipulation, graphics creation, data analysis, and advanced techniques. It includes practical examples and code snippets for creating vectors, data frames, and visualizations using both base R and the ggplot2 package. Additionally, it demonstrates statistical analysis methods such as linear regression and descriptive statistics.

Uploaded by

HARRISON SAEZ
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

R Programming Tutorial for Beginners

###################################################
# #
# R Programming Tutorial for Beginners #
# Joaquim Schork #
# #
###################################################

## Tomado de: https://statisticsglobe.com/wp-content/uploads/2022/01/Introduction-


to-the-R-Programming-Language-by-Statistics-Globe.txt

## Fijando el directorio de trabajo...


setwd("C:/datos")

## Desinstalando objetos de la sesión anterior...


rm(list = ls())

##### Data Manipulation in R #####

vec_1 <- c(1, 1, 5, 3, 1, 5) # Create vector object


vec_1 # Print vector object

data_1 <- data.frame(x1 = c(7, 2, 8, 3, 3, 7), # Create data frame


x2 = c("x", "y", "x", "x", "x", "y"),
x3 = 11:16)
data_1 # Print data frame

list_1 <- list(1:5, # Create list


vec_1,
data_1)
list_1 # Print list

class(vec_1) # Check class of vector elements

vec_2 <- c("a", "b", "a", "c") # Create character vector


vec_2 # Create character vector

class(vec_2) # Check class of vector elements

vec_3 <- factor(c("gr1", "gr1", "gr2", "gr3", "gr2")) # Create factor vector
vec_3 # Print factor vector

class(vec_3) # Check class of vector elements

vec_4 <- as.character(vec_3) # Convert factor to character


vec_4 # Print updated vector
class(vec_4) # Check class of updated vector elements

data_2 <- data_1 # Create duplicate of data frame


data_2$x4 <- vec_1 # Add new column to data frame
data_2 # Print updated data frame

data_3 <- data_2[ , colnames(data_2) != "x2"] # Remove column from data


frame
data_3 # Print updated data frame

data_4 <- data_3 # Create duplicate of data frame


colnames(data_4) <- c("col_A", "col_B", "col_C") # Change column names
data_4 # Print updated data frame

data_5 <- rbind(data_3, 101:103) # Add new row to data frame


data_5 # Print updated data frame

data_6 <- data_5[data_5$x1 > 3, ] # Remove rows from data frame


data_6 # Print updated data frame

data_7 <- data.frame(ID = 101:106, # Create first data frame


x1 = letters[1:6],
x2 = letters[6:1])
data_7 # Print first data frame

data_8 <- data.frame(ID = 104:108, # Create second data frame


y1 = 1:5,
y2 = 5:1,
y3 = 5)
data_8 # Print second data frame

data_9 <- merge(x = data_7, # Merge two data frames


y = data_8,
by = "ID",
all = TRUE)
data_9 # Print merged data frame

vec_5 <- vec_1 # Create duplicate of vector


vec_5[vec_5 == 1] <- 99 # Replace certain value in vector
vec_5 # Print updated vector

data_10 <- data_1 # Create duplicate of data frame


data_10$x2[data_10$x2 == "y"] <- "new" # Replace values in column
data_10 # Print updated data frame

getwd() # Get current working directory

setwd("C:/datos")
getwd() # Get current working directory

write.csv(data_10, # Export data frame to CSV file


"data_10.csv",
row.names = FALSE)

data_11 <- read.csv("data_10.csv") # Import data frame from CSV file


data_11 # Print imported data frame

##### Creating Graphics in R #####

data(iris) # Load iris data set


head(iris) # Print head of iris data set

plot(x = iris$Sepal.Length, # Draw Base R scatterplot


y = iris$Sepal.Width,
col = iris$Species)

plot(density(x = iris$Sepal.Length)) # Draw Base R density plot

hist(x = iris$Sepal.Length) # Draw Base R histogram

boxplot(iris$Sepal.Length ~ iris$Species) # Draw Base R boxplot

install.packages("ggplot2") # Install ggplot2 package


library("ggplot2") # Load ggplot2

ggplot(iris, # Draw ggplot2 scatterplot


aes(x = Sepal.Length,
y = Sepal.Width,
col = Species)) +
geom_point()

ggplot(iris, # Draw ggplot2 density plot


aes(x = Sepal.Length)) +
geom_density()

ggplot(iris, # Draw multiple ggplot2 density plots


aes(x = Sepal.Length,
col = Species)) +
geom_density()

ggplot(iris, # Fill ggplot2 density plots


aes(x = Sepal.Length,
col = Species,
fill = Species)) +
geom_density()
ggplot(iris, # Opacity of ggplot2 density plots
aes(x = Sepal.Length,
col = Species,
fill = Species)) +
geom_density(alpha = 0.3)

ggplot(iris, # Draw ggplot2 histogram


aes(x = Sepal.Length)) +
geom_histogram()

ggplot(iris, # Draw ggplot2 boxplot


aes(x = Species,
y = Sepal.Length)) +
geom_boxplot()

ggplot(iris, # Add colors to ggplot2 boxplot


aes(x = Species,
y = Sepal.Length,
fill = Species)) +
geom_boxplot()

iris_groups <- iris # Create duplicate of iris data set


iris_groups$Sub <- letters[1:3] # Add subgroups to data
iris_groups <- aggregate(formula = Sepal.Length ~ Species + Sub, # Mean by
subgroup
data = iris_groups,
FUN = mean)
iris_groups # Print aggregated iris data set

ggplot(iris_groups, # Draw ggplot2 barplot


aes(x = Species,
y = Sepal.Length)) +
geom_bar(stat = "identity")

ggplot(iris_groups, # Draw stacked ggplot2 barplot


aes(x = Species,
y = Sepal.Length,
fill = Sub)) +
geom_bar(stat = "identity")

ggplot(iris_groups, # Draw grouped ggplot2 barplot


aes(x = Species,
y = Sepal.Length,
fill = Sub)) +
geom_bar(stat = "identity",
position = "dodge")

##### Data Analysis & Descriptive Statistics in R #####


mean(vec_1) # Calculate mean

median(vec_1) # Calculate median

min(vec_1) # Calculate minimum

max(vec_1) # Calculate maximum

sum(vec_1) # Calculate sum

var(vec_1) # Calculate variance

sd(vec_1) # Calculate standard deviation

summary(vec_1) # Calculate multiple descriptive statistics

table(vec_1) # Create frequency table

table(data_1[ , c("x1", "x2")]) # Create contingency table

mod_1 <- lm(formula = Sepal.Width ~ Sepal.Length, # Estimate linear regression


model
data = iris)

summary(mod_1) # Summary statistics of model

ggplot(iris, # Draw scatterplot with regression line


aes(x = Sepal.Length,
y = Sepal.Width)) +
geom_point() +
geom_smooth(method = "lm")

mod_2 <- lm(formula = Sepal.Width ~ Sepal.Length + Species, # Model wit multiple


predictors
data = iris)

summary(mod_2) # Summary statistics of model

ggplot(iris, # Draw multiple regression lines


aes(x = Sepal.Length,
y = Sepal.Width,
col = Species)) +
geom_point() +
geom_smooth(method = "lm")

##### Advanced Techniques in R #####


vec_6 <- numeric() # Create empty numeric vector
vec_6 # Print empty numeric vector

for(i in 1:length(vec_1)) { # Apply for loop to vector


vec_6[i] <- vec_1[i] + i
}

vec_1 # Print vec_1 for comparison

vec_6 # Print new vector

data_12 <- data_1 # Create duplicate of data frame


data_12$x4 <- NA # Add new column containing only NA
data_12 # Print new data frame

for(i in 1:nrow(data_1)) { # Loop over rows of data frame


data_12$x4[i] <- data_12$x1[i] + i * data_12$x3[i]
}

data_12 # Print updated data frame

vec_7 <- character() # Create empty character vector


vec_7 # Print empty character vector

for(i in 1:length(vec_1)) { # for loop & nested if else statement


if(vec_1[i] > 3) {
vec_7[i] <- "high"
} else {
vec_7[i] <- "low"
}
}

vec_7 # Print updated vector

vec_8 <- ifelse(test = vec_1 > 3, # Apply ifelse function


yes = "high",
no = "low")

vec_8 # Print new vector

fun_1 <- function(x) { # Create simple user-defined function


out <- x^2 + 5 * x
out
}

fun_1(x = vec_1) # Apply simple user-defined function

fun_2 <- function(x, y) { # Create complex user-defined function


if(y > 3) {
out <- (x^2 + 5 * x) / y
} else {
out <- (x^2 + 5 * x) / (10 * y)
}
out
}

for(i in 1:5) { # Complex user-defined function in for loop


print(paste0("This is the result of iteration ",
i,
": ",
fun_2(x = 5, y = i)))
}

You might also like