Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
20 views

Final Data Lab

Lab Manual

Uploaded by

pvarshinibca
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
20 views

Final Data Lab

Lab Manual

Uploaded by

pvarshinibca
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 21

DATA SCIENCE & MACHINE LEARNING

P23CAP26 LABORATORY

EX . NO: 1 - R AS CALCULATOR APPLICATION.

PROGRAM:

add <- function(x, y) {


return(x + y)
}

subtract <- function(x, y) {


return(x - y)
}

multiply <- function(x, y) {


return(x * y)
}

divide <- function(x, y) {


return(x / y)
}
print("Select operation.")
print("1.Add")
print("2.Subtract")
print("3.Multiply")
print("4.Divide")

choice = as.integer(readline(prompt="Enter choice[1/2/3/4]: "))


num1 = as.integer(readline(prompt="Enter first number: "))
num2 = as.integer(readline(prompt="Enter second number: "))
operator <- switch(choice,"+","-","*","/")
result <- switch(choice, add(num1, num2), subtract(num1, num2), multiply(num1,
num2), divide(num1, num2))
print(paste(num1, operator, num2, "=", result))

OUTPUT
EX.NO: 2 – DISCRIPTIVE STATISTICS ON R.

#MEAN#

how_was_the_day = c(2,3,4,3,5,4,2,1,3)

tot_day = sum(how_was_the_day)

cnt_day = length(how_was_the_day)

avg_for_day = tot_day / cnt_day

print(avg_for_day)

mean(how_was_the_day)

#MEDIAN#

a = c(200,400,1000,450,234,700)
median(a)

b = c(7000,4000,2000,1000,5000,5000)
median(b)
c = c(20,30,40,50,60)
median(c)

#MODE#

getmode <- function(v) {


uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
sale <- c(10,45,67,30,60,34,59,90)

result <- getmode(sale)


print(result)

OUTPUT
EX . NO: 3 - READING AND WRITING DIFFERENT TYPES OF DATASETS

PROGRAM:
# Install necessary packages
install.packages("readr")

install.packages("readxl")
install.packages("writexl")
install.packages("haven")

# Load the packages


library(readr)
library(readxl)
library(writexl)

library(haven)

# CSV Files
df_csv <- read_csv("file.csv")
write_csv(df_csv, "output.csv")

# Excel Files (XLSX)


df_excel <- read_excel("file.xlsx")
write_xlsx(df_excel, "output.xlsx")

# Text Files (Tab-Delimited)


df_tsv <- read_tsv("file.tsv")
write_tsv(df_tsv, "output.tsv")
OUTPUT:
EX.NO: 4. VISUALIZATION.

PROGRAM:

library(lattice)
library(datasets)
mtcars
str(mtcars)
1.#HISTOGRAM#
histogram(~mpg,data=mtcars)
2.#BOX PLOT#
bwplot(~mpg,data=mtcars)
3.#DENSITY PLOT#
densityplot(~mpg,data=mtcars)
4.#BAR GRAPH#
barchart(~mpg,data=mtcars)
OUTPUT:

HISTOGRAM

BOXPLOT

DENSITY PLOT
BAR GRAPH
EX.NO:5 CORRELATION AND COVARIANCE

PROGRAM:
x <- c(2, 4, 6, 8, 10)

y <- c(1, 3, 2, 5, 7)

#COVARIANCE#

cov_xy <- cov(x, y)

cat("Covariance between x and y:", cov_xy, "\n")

#CORRELATION#

cor_xy <- cor(x, y)

cat("Correlation between x and y:", cor_xy, "\n")

OUTPUT:
6.REGRESSION MODEL

# Set seed for reproducibility


set.seed(42)

# Generate data
x <- rnorm(100, mean = 5, sd = 2)
y <- 3 + 2 * x + rnorm(100)

# Fit linear model


model <- lm(y ~ x)

# Summary of the model


print(summary(model))

# Plot the data and regression line using base R


plot(x, y, main = "Scatter Plot with Regression Line", xlab = "x", ylab = "y")
abline(model, col = "blue")

OUTPUT:
7.Multiple regression

install.packages("tidyverse")

library(tidyverse)

data(mtcars)

head(mtcars)

model <- lm(mpg ~ hp + wt + cyl, data = mtcars)

summary(model)

new_data <- tibble(hp = c(150, 200), wt = c(2.5, 3.0), cyl = c(4, 6))

predictions <- predict(model, newdata = new_data)

print(predictions)

par(mfrow = c(2, 2))


plot(model)

Output:
print(predictions)
1 2
24.36217 19.99355

Plot:
EX.NO: 8. Regression model for prediction
# Load necessary libraries

library(ggplot2) # For visualization

# Load the dataset

data(mtcars)

# Explore the dataset

head(mtcars)

summary(mtcars)

# Fit a linear regression model

# Predicting 'mpg' based on 'wt' and 'hp'

model <- lm(mpg ~ wt + hp, data = mtcars)

# Display the summary of the model

summary(model)

# Create a new data frame for predictions

new_data <- data.frame(wt = c(2.5, 3.0, 3.5), hp = c(100, 120, 150))

# Make predictions

predictions <- predict(model, new_data)

# Print the predictions


cat("Predictions for new data:\n")

print(predictions)

# Add predictions to the original dataset for visualization

mtcars$predicted_mpg <- predict(model, mtcars)

# Plot actual vs. predicted mpg

ggplot(mtcars, aes(x = mpg, y = predicted_mpg)) +

geom_point(color = "blue") +

geom_abline(intercept = 0, slope = 1, color = "red") +

labs(title = "Actual vs. Predicted MPG",

x = "Actual MPG",

y = "Predicted MPG") +

theme_minimal()

OUTPUT :
EX.NO : 9 – CLASSIFICATION MODEL.

PROGRAM:
# Install and load necessary packages

install.packages("caret")
install.packages("randomForest")
library(caret)
library(randomForest)

# Load the dataset


data(iris)
# Split data into training and testing sets
set.seed(123) # for reproducibility
trainIndex <- createDataPartition(iris$Species, p = 0.7, list = FALSE)

trainData <- iris[trainIndex, ]


testData <- iris[-trainIndex, ]

# Train a Random Forest model


model <- train(Species ~ ., data = trainData, method = "rf")

# Make predictions on the test set


predictions <- predict(model, newdata = testData)

# Evaluate model performance

confMatrix <- confusionMatrix(predictions, testData$Species)


print(confMatrix)

OUTPUT :

Confusion Matrix and Statistics

Reference
Prediction setosa versicolor virginica
setosa 15 0 0
versicolor 0 14 2
virginica 0 1 13

Overall Statistics

Accuracy : 0.9333
95% CI : (0.8173, 0.986)
No Information Rate : 0.3333
P-Value [Acc > NIR] : < 2.2e-16

Kappa : 0.9
Mcnemar's Test P-Value : NA

Statistics by Class:

Class: setosa Class: versicolor Class: virginica


Sensitivity 1.0000 0.9333 0.8667
Specificity 1.0000 0.9333 0.9667
Pos Pred Value 1.0000 0.8750 0.9286
Neg Pred Value 1.0000 0.9655 0.9355
Prevalence 0.3333 0.3333 0.3333
Detection Rate 0.3333 0.3111 0.2889
Detection Prevalence 0.3333 0.3556 0.3111
Balanced Accuracy 1.0000 0.9333 0.9167

EX.NO : 10. CLUSTERING MODE

PROGRAM:
# Load necessary package

set.seed(123) # For reproducibility

# Load the iris dataset


data(iris)

# Remove the species column for clustering


iris_data <- iris[, -5]

# Perform K-means clustering with 3 clusters

kmeans_result <- kmeans(iris_data, centers = 3, nstart = 20)

# View the clustering results


print(kmeans_result$cluster)

# Add cluster information to the original dataset


iris$Cluster <- as.factor(kmeans_result$cluster)

# Plot the clusters


library(ggplot2)

ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Cluster)) +


geom_point(size = 3) +
labs(title = "K-means Clustering of Iris Dataset") +
theme_minimal()
OUTPUT:

You might also like