Regression
Regression
Regression
# Install and load caTools to split the dataset into training and test sets
install.packages("caTools") # Install the caTools package for data splitting
library(caTools) # Load the caTools package
# Split the dataset into training and testing subsets (80% for training, 20% for testing)
set.seed(101) # Set a random seed to ensure reproducibility of the random split
sample = sample.split(1:nrow(marketing), SplitRatio = 0.8) # Create a sample split (80%
training, 20% testing)
sample # Print the sample split to inspect the selection
# Create the training and testing datasets based on the sample split
train = subset(marketing, sample == TRUE) # Training set (80% of data)
test = subset(marketing, sample == FALSE) # Testing set (20% of data)
# Build and evaluate a Simple Linear Regression (LR) model: Predict 'sales' based on
'youtube' spend
model1 <- lm(sales ~ youtube, data=train) # Fit a simple linear regression model using
'youtube' as predictor
summary(model1) # Display a summary of the linear regression model
(coefficients, R-squared, etc.)
pred1 <- predict(model1, test) # Predict sales using the trained model on the test
data
# Build and evaluate a Multiple Linear Regression (MLR) model: Predict 'sales' based on
'youtube', 'facebook', and 'newspaper'
model <- lm(sales ~ youtube + facebook + newspaper, data=train) # Fit a multiple linear
regression model using multiple predictors
summary(model) # Display a summary of the multiple linear
regression model
pred <- predict(model, test) # Predict sales using the multiple regression
model on the test data
# Prepare a new variable for plotting (x-axis as the sequence of test rows)
x_axis <- seq(nrow(test)) # Create an x-axis variable representing row numbers of the test
data
# Combine the predictions and actual sales values into a data frame for easy comparison
df <- data.frame(x_axis, pred, test$sales) # Combine x-axis, predicted sales, and actual
sales into a data frame
View(df) # View the resulting data frame with predictions and actual values
# Create a line plot to compare the predicted sales vs. the actual sales using ggplot2
g <- ggplot(df, aes(x=x_axis)) +
geom_line(aes(y=pred, colour="Predicted")) + # Plot the predicted sales as a line
geom_point(aes(x=x_axis, y=pred, colour="Predicted")) + # Plot the predicted sales as
points
geom_line(aes(y=test$sales, colour="Actual")) + # Plot the actual sales as a line
geom_point(aes(x=x_axis, y=test$sales, colour="Actual")) + # Plot the actual sales as
points
scale_colour_manual("", values = c(Predicted="red", Actual="blue")) # Customize the
colors of the lines and points