Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

Regression

Download as pdf or txt
Download as pdf or txt
You are on page 1of 2

# Install and load the necessary package 'datarium' for the marketing dataset

install.packages("datarium") # Install the datarium package


library(datarium) # Load the datarium package

# Load and inspect the marketing dataset


data(package="datarium") # List datasets in the datarium package
data("marketing") # Load the marketing dataset
?marketing # Get more information about the dataset
View(marketing) # View the dataset in a tabular format
str(marketing) # Check the structure of the dataset (variables and data types)
summary(marketing) # Get summary statistics (mean, min, max, etc.) for the dataset
plot(marketing, col="red") # Plot the dataset with red points to inspect relationships

# Load the ggplot2 library for advanced visualizations


library(ggplot2)

# Install and load caTools to split the dataset into training and test sets
install.packages("caTools") # Install the caTools package for data splitting
library(caTools) # Load the caTools package

# Split the dataset into training and testing subsets (80% for training, 20% for testing)
set.seed(101) # Set a random seed to ensure reproducibility of the random split
sample = sample.split(1:nrow(marketing), SplitRatio = 0.8) # Create a sample split (80%
training, 20% testing)
sample # Print the sample split to inspect the selection

# Create the training and testing datasets based on the sample split
train = subset(marketing, sample == TRUE) # Training set (80% of data)
test = subset(marketing, sample == FALSE) # Testing set (20% of data)

# Check the dimensions of the training and testing sets


dim(train) # Check the number of rows and columns in the training set
dim(test) # Check the number of rows and columns in the testing set

# Build and evaluate a Simple Linear Regression (LR) model: Predict 'sales' based on
'youtube' spend
model1 <- lm(sales ~ youtube, data=train) # Fit a simple linear regression model using
'youtube' as predictor
summary(model1) # Display a summary of the linear regression model
(coefficients, R-squared, etc.)
pred1 <- predict(model1, test) # Predict sales using the trained model on the test
data

# Build and evaluate a Multiple Linear Regression (MLR) model: Predict 'sales' based on
'youtube', 'facebook', and 'newspaper'
model <- lm(sales ~ youtube + facebook + newspaper, data=train) # Fit a multiple linear
regression model using multiple predictors
summary(model) # Display a summary of the multiple linear
regression model
pred <- predict(model, test) # Predict sales using the multiple regression
model on the test data

# Prepare a new variable for plotting (x-axis as the sequence of test rows)
x_axis <- seq(nrow(test)) # Create an x-axis variable representing row numbers of the test
data

# Combine the predictions and actual sales values into a data frame for easy comparison
df <- data.frame(x_axis, pred, test$sales) # Combine x-axis, predicted sales, and actual
sales into a data frame
View(df) # View the resulting data frame with predictions and actual values

# Create a line plot to compare the predicted sales vs. the actual sales using ggplot2
g <- ggplot(df, aes(x=x_axis)) +
geom_line(aes(y=pred, colour="Predicted")) + # Plot the predicted sales as a line
geom_point(aes(x=x_axis, y=pred, colour="Predicted")) + # Plot the predicted sales as
points
geom_line(aes(y=test$sales, colour="Actual")) + # Plot the actual sales as a line
geom_point(aes(x=x_axis, y=test$sales, colour="Actual")) + # Plot the actual sales as
points
scale_colour_manual("", values = c(Predicted="red", Actual="blue")) # Customize the
colors of the lines and points

You might also like