Python vs SQL
Python vs SQL
*PYTHON *SQL
Q1: Find the top 10 products by
sales for each year and location.
Parameters:
Product_id
Location
Sales
Year
Month
Q1
python
top_products = top_products.sort_values(["Year",
"Location", "Sales"], ascending=[True, True, False])
top_products = top_products.groupby(["Year",
"Location"]).head(10)
print(top_products)
sql
WITH RankedProducts AS (
SELECT Product_id, Location, Year, Month, SUM(Sales)
AS total_sales, RANK() OVER (PARTITION BY Year,
Location ORDER BY SUM(Sales) DESC) AS rank FROM
sales_data GROUP BY Product_id, Location, Year,
Month )
Parameters:
Category
Product_id
Sales
Month
Year
Q2
python
df["Previous_Month_Sales"] = df.groupby("Product_id")
["Sales"].shift(1)
df["Growth_Rate"] = (df["Sales"] -
df["Previous_Month_Sales"]) /
df["Previous_Month_Sales"] * 100
growth_rate = df.groupby(["Category", "Year", "Month"])
["Growth_Rate"].mean().reset_index()
print(growth_rate)
sql
WITH SalesGrowth AS (
SELECT Category, Product_id, Year, Month, Sales,
LAG(Sales) OVER (PARTITION BY Product_id ORDER BY
Year, Month) AS Previous_Month_Sales
FROM sales_data
)
SELECT Category, Year, Month,
(Sales - Previous_Month_Sales) / Previous_Month_Sales * 100
AS Growth_Rate
FROM SalesGrowth
WHERE Previous_Month_Sales IS NOT NULL;
Q3: Find the top 5 products with
the highest sales growth over the
past year.
Parameters:
Product_id
Sales
Year
Month
Category
Q3
python
df["Previous_Year_Sales"] = df.groupby("Product_id")
["Sales"].shift(12) # Assuming monthly data
df["YoY_Growth"] = (df["Sales"] -
df["Previous_Year_Sales"]) / df["Previous_Year_Sales"]
* 100
top_growth_products = df.groupby("Product_id")
["YoY_Growth"].mean().nlargest(5).reset_index()
print(top_growth_products)
sql
WITH YearlyGrowth AS (
SELECT Product_id, Year, Month, Sales,
LAG(Sales, 12) OVER (PARTITION BY Product_id ORDER
BY Year, Month) AS Previous_Year_Sales
FROM sales_data
SELECT Product_id, AVG((Sales - Previous_Year_Sales) /
Previous_Year_Sales * 100) AS YoY_Growth
FROM YearlyGrowth
WHERE Previous_Year_Sales IS NOT NULL
GROUP BY Product_id ORDER BY YoY_Growth DESC
LIMIT 5;
Q4: Find the average sales per
customer by region for each
quarter.
Parameters:
Customer_id
Sales
Region
Year
Quarter
Q4
python
avg_sales_per_customer =
avg_sales_per_customer.groupby(["Region", "Year",
"Quarter"])["Sales"].mean().reset_index()
print(avg_sales_per_customer)
sql
Parameters:
Location
Sales
Month
Year
Product_id
Q5
python
top_variance_location =
sales_variance.groupby("Location")
["Sales"].mean().idxmax()
print(top_variance_location)
sql
Parameters:
Payment_method
Order_id
Sales
Location
Customer_id
Q6
python
df['Order_Value'] = df.groupby('Order_id')
['Sales'].transform('sum')
avg_order_value = df.groupby(['Payment_method',
'Location'])['Order_Value'].mean().reset_index()
print(avg_order_value)
sql
Parameters:
Customer_id
Sales
Region
Year
Month
Q7
python
highest_purchases = total_purchases.groupby("Region")
["Sales"].idxmax()
print(total_purchases.loc[highest_purchases])
sql
WITH TotalPurchases AS (
SELECT Customer_id, Region, SUM(Sales) AS total_sales
FROM sales_data
GROUP BY Customer_id, Region
)
SELECT Customer_id, Region, total_sales
FROM TotalPurchases
WHERE (Customer_id, total_sales) IN (
SELECT Customer_id, MAX(total_sales)
FROM TotalPurchases
GROUP BY Region;
Q8: Find the products that have
contributed the most to sales in
each category.
Parameters:
Product_id
Sales
Category
Year
Month
Q8
python
total_sales_category = df.groupby(["Category",
"Product_id"])["Sales"].sum().reset_index()
max_sales_product =
total_sales_category.loc[total_sales_category.groupby(
"Category")["Sales"].idxmax()]
print(max_sales_product)
sql
WITH CategorySales AS (
SELECT Category, Product_id, SUM(Sales) AS total_sales
FROM sales_data
GROUP BY Category, Product_id
)
SELECT Category, Product_id, total_sales
FROM CategorySales
WHERE (Category, total_sales) IN (
SELECT Category, MAX(total_sales)
FROM CategorySales
GROUP BY Category);
Q9: Calculate the monthly sales
distribution for each region.
Parameters:
Sales
Region
Year
Month
Product_id
Q9
python
print(monthly_sales)
sql
Parameters:
Product_id
Sales
Region
Year
Month
Q10
python
best_selling_product =
best_selling_product.loc[best_selling_product.groupby("
Region")["Sales"].idxmax()]
print(best_selling_product)
sql
Follow Us on Linkedin:
Aditya Chandak
Free SQL Interview Preparation:
https://topmate.io/nitya_cloudtech/1403841