Project - Retail Analysis With Walmart Data
Project - Retail Analysis With Walmart Data
Project - Retail Analysis With Walmart Data
rm(list=ls())
library(dplyr)
library(lubridate)
library(tibble)
library(plyr)
##### Data Preparation - Converting 'Date' column into Date Variable #####
walmart$Date = as.Date(walmart$Date,format=c("%d-%m-%Y"))
View(walmart)
head (walmart)
str (walmart)
# Method 1
store_sales %>%
group_by(Store, Weekly_Sales) %>%
summarise(max=max(Weekly_Sales, na.rm=TRUE))
# Method 2
arrange(store_sales, desc(Weekly_Sales))
arrange(store_sales, desc(Weekly_Sales))[1,]
# Which store has maximum standard deviation i.e., the sales vary a lot.
# Also, find out the coefficient of mean to standard deviation
store_mean = aggregate(Weekly_Sales~Store,walmart, mean) # Aggregate sales data storewise and get mean values
head(store_mean, 10)
library(plyr)
store_mean = plyr::rename(store_mean, c("Weekly_Sales" = "Mean_Sales")) # Renaming the mean values column
head(store_mean)
store_std = aggregate(Weekly_Sales~Store,walmart,sd) # Aggregate sales data storewise and get standard deviation
head(store_std, 10)
store_std = plyr::rename(store_std, c("Weekly_Sales" = "Std_Sales")) # Renaming the column
head(store_std)
arrange(store_std,desc(Std_Sales))[1,]
arrange(Cov, desc(coeff))[1,]
Stores_q_Flag_sum_r_GR = mutate(Stores_q_Flag_sum_r,
GR=((Weekly_Sales.Q3_2012-Weekly_Sales.Q2_2012)/Weekly_Sales.Q2_2012))
head(Stores_q_Flag_sum_r_GR)
arrange(Stores_q_Flag_sum_r_GR, desc(GR))[1,]
# Some holidays have a negative impact on sales. Find out holidays which have
# higher sales than the mean sales in non-holiday season for all stores together?
Average_non_holiday_sales = mean(non_holiday_sales$Weekly_Sales) # This will compute the average non holiday sales
Average_non_holiday_sales ## 1041256
## Answer :: Mean sales in non-holiday season for all stores together is 1041256
############################### ----- 5 ----- ##########################################################
# Provide a monthly and semester view of sales in units and give insights?
Summarized_View = aggregate(Weekly_Sales~Month_Sale+Year_Sale,walmart_s_month_year,sum)
View(Summarized_View)
Insight_data = arrange(Summarized_View,desc(Weekly_Sales))
View(Insight_data)
## Insights - Walmart recorded the highest sales in Dec 2010 and Dec 2011 and the lowest sales in Jan 2011 and Jan 2012.
## December is the month of highest sales and is followed by the lowest sale in month of January. Walmart can plan its
inventory accordingly.
############################### ***LINEAR MODEL***##########################################
library(corrplot)
corrplot(cor(walmart[-c(1,2)]))
cor_data = cor(walmart[-c(1,2)])
corrplot(cor_data, method = c("number"), type="lower")
## Now, Dropping insignificant varieables - which are temperature and fuel price
library(lubridate)
library(tibble)
(walmart$Date)[1:5]
yday(walmart$Date-1)[1:5]
(walmart$Date - yday(walmart$Date))[1:5]
yday(walmart$Date - yday(walmart$Date))[1:5]
(walmart$Date - yday(walmart$Date)[1])[1:5]
yday(walmart$Date - yday(walmart$Date)[1])[1:5]
walmart = add_column(walmart, Days=yday(walmart$Date - yday(walmart$Date)), after = 2)
head(walmart)
summary(walmart)
******************