R Lab File Deepak
R Lab File Deepak
R Lab File Deepak
R programming is used as a leading tool for machine learning, statistics, and data
analysis. Objects, functions, and packages can easily be created by R .
R programming language is not only a statistic package but also allows us to
integrate with other languages (C, C++). Thus, you can easily interact with many
data sources and statistical packages.
R is currently one of the most requested programming languages in the Data Science
job market that makes it the hottest trend nowadays .
1. STATISTICAL FEATURES OF R:
Basic Statistics: The most common basic statistics terms are the mean, mode,
and median. These are all known as “Measures of Central Tendency.” So using
the R language we can measure central tendency very easily.
Static graphics: R is rich with facilities for creating and developing interesting
static graphics. R contains functionality for many plot types including graphic
maps, mosaic plots, biplots, and the list goes on.
Probability distributions: Probability distributions play a vital role in statistics
and by using R we can easily handle various types of probability distribution
such as Binomial Distribution, Normal Distribution, Chi-squared Distribution
and many more.
Data analysis: It provides a large, coherent and integrated collection of tools for
data analysis .
2. PROGRAMMING FEATURES OF R
a <- 7.5
b <- 2
print ( a+b ) #addition
print ( a-b ) #subtraction
print ( a*b ) #multiplication
print ( a/b ) #Division
print ( a%%b ) #Reminder
print ( a%/%b ) #Quotient
print ( a^b ) #Power of
OUTPUT-
EXPERIMENT-2
TO PERFORM THE DATA FRAMING.
# R program to create dataframe
# creating a data frame
friend.data <- data.frame(
friend_id = c(1:5),
friend_name = c("Sachin", "Sourav",
"Dravid", "Sehwag",
"Dhoni"),
stringsAsFactors = FALSE
)
# print the data frame
print(friend.data)
OUTPUT-
EXPERIMENT-3
TO PRINT MAXIMUM AND MINIMUM VALUE USING DATA
FRAMING.
# create a dataframe
data=data.frame(column1=c(23,4,56,21),
column2=c("sai","deepu","ram","govind"),
column3=c(1.3,4.6,7.8,6.3))
# get the minimum value in first column
print(min(data$column1))
# get the minimum value in second column
print(min(data$column2))
# get the minimum value in third column
print(min(data$column3))
# get the maximum value in first column
print(max(data$column1))
# get the maximum value in second column
print(max(data$column2))
# get the maximumvalue in third column
print(max(data$column3))
OUTPUT-
EXPERIMENT-4
TO GET THE INPUT FROM THE USER AND PERFORM
NUMERICAL OPERATIONS(MAX, MIN, AVG, SUM, SORT,
ROUND)IN R
print("Enter the numbers: ")
x = scan()
print(x)
print("Max value is: ")
max(x)
print("Min value is: ")
min(x)
print("Average is: ")
print(mean(x))
print("Sum is: ")
sum(x)
print("Sorted array is: ")
sort(x)
data <- c(.3, 1.03, 2.67, 5, 8.91)
round(data, digits = 1)
OUTPUT-
EXPERIMENT-5
TO PERFORM DATA IMPORT/ EXPORT (.CSV, .XLS, .TXT)
OPERATIONS USING DATA FRAMES IN R.
#Read csv file
code <- read.csv("c:\\Users\\deepak\\Desktop\\TEST.csv")
code
#Read xls file
library(readxl)
mydatasheet <- read_excel("c:\\Users\\deepak\\Desktop\\TEST2.xls")
OUTPUT-
EXPERIMENT-6
TO GET THE INPUT MATRIX FROM THE USER AND PERFORM
MATRIX ADDITION, SUBTRACTION, MULTIPLICATION, INVERSE
TRANSPOSE, AND DIVISION OPERATIONS USING VECTOR
CONCEPT IN R.
#matrix a
data.a = scan()
matrix.a <-matrix(data.a,nrow = 3,ncol = 3, byrow = TRUE)
matrix.a
#matrix b
data.b = scan()
matrix.b <-matrix(data.b,nrow = 3,ncol = 3, byrow = TRUE)
matrix.b
#addition
data.a+data.b
sum<-data.a+data.b
matrix.sum <-matrix(sum,nrow = 3,ncol = 3,byrow = TRUE)
matrix.sum
#subtraction
data.a-data.b
diff<-data.a-data.b
matrix.diff <-matrix(diff,nrow = 3,ncol = 3,byrow = TRUE)
matrix.diff
#multiplication
data.a*data.b
mul<-data.a*data.b
matrix.mul<-matrix(mul,nrow = 3,ncol = 3,byrow = TRUE)
matrix.mul
#transpose
t(data.a)
t<-t(data.a)
matrix.t<- matrix(t,nrow = 3,ncol = ,byrow = TRUE)
matrix.t
#determinant of matrix a
det(matrix.a)
#inverse of matrix a
solve(matrix.a)
OUTPUT-
EXPERIMENT-7
TO PERFORM STATISTICAL OPERATIONS (MEAN, MEADIAN,
MODE AND STANDARD DEVIATION) USING R.
# Create a vector.
x <- c(12,7,3,4.2,18,2,54,-21,8,-5)
# Find Mean.
result.mean <- mean(x)
print(result.mean)
# Find the median.
median.result <- median(x)
print(median.result)
# Create the function.
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
# Create the vector with numbers.
v <- c(2,1,2,3,1,2,3,4,1,5,5,3,2,3)
# Calculate the mode using the user function.
result <- getmode(v)
print(result)
#standard deviation
v <- c(12,24,74,32,14,29,84,56,67,41)
s<-sd(v)
print(s)
OUTPUT-
EXPERIMENT-8
TO PERFORM DATA PREPROCESSING OPERAIONS
i) HANDLING MISSING DATA
ii) MIN-MAX NORMALIZATION
# load packages and data
#install.packages("caret")
library(caret)
# creating a dataset
data = data.frame(var1=c(120, 345, 145, 122, 596, 285, 211),
var2=c(10, 15, 45, 22, 53, 28, 12),
var3=c(-34, 0.05, 0.15, 0.12, -6, 0.85, 0.11))
data
# summary of data
summary(data)
# preprocess the data
preproc <- preProcess(data, method=c("range"))
# perform normalization
norm <- predict(preproc, data)
head(norm)
# checking summary after normalization
summary(norm)
OUTPUT-
EXPERIMENT-9
TO PERFORM DIMENSIONALITY REDUCTION OPERATION USING
PCA FOR HOUSES DATA SET
install.packages("stats")
install.packages("dplyr")
#importing the libraries
library(stats)
library(dplyr)
# Iris data set
View(iris)
#unsupervised learning - hence converting iris data to unlabelled data set
mydata = select(iris,c(1,2,3,4))
#PAC eligibility
cor(mydata)
mean(cor(mydata))
#PRINCIPLE COMPONENT ANALYSIS
PCA = princomp(mydata)
#evaluate the PCA
PCA$loadings
OUTPUT-
EXPERIMENT-10
TO PERFORM SIMPLE LINEAR REGRESSION WITH R
#import the file to apply linear regression
ads <- read.csv('d:/TEST1.csv')
colnames(ads)
TV <- ads$TV
Sales <- ads$sales
plot(TV,Sales,pch=16,cex=1,col='blue',
main='TV vs Sales',xlab = 'TV',ylab = 'Sales')
summary(model)
abline(model)
OUTPUT-
EXPERIMENT-11
TO PERFORM K-MEANS CLUSTERING OPERATIONS AND
VISUALIZE FOR IRIS DATA SET.
#install the required libraries
install.packages("stats")
install.packages("dplyr")
install.packages("ggplot2")
install.packages("ggfortify")
#unsupervised learning - Hence converting iris data set into unlabblled data set
View(iris)
mydata = select(iris,c(1,2,3,4))
#cluster plot
autoplot(KM,mydata,frame=TRUE)
#cluster centers
KM$centers
OUTPUT-
EXPERIMENT-11
LEARN HOW TO COLLECT DATA VIA WEB SCRAPPING, API’s
AND DATA CONNECTORS FROM SUITABLE SOURCES AS
SPECIFIED BY THE INSTRUCTOR.
#install the packages
#install.packages("dplyr")
#install.packages("rvest")
OUTPUT-