diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..8e05635 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,7 @@ +*.sql linguist-detectable=true +*.sql linguist-language=SQL + + +*.md linguist-vendored +*.json linguist-vendored +*.py linguist-vendored diff --git a/.github/workflows/followup.yml b/.github/workflows/followup.yml new file mode 100644 index 0000000..40c44fb --- /dev/null +++ b/.github/workflows/followup.yml @@ -0,0 +1,56 @@ +name: Generate Followup Metrics + +on: + schedule: + - cron: "0 0 * * *" # 5:30 AM IST + - cron: "30 5 * * *" # 10:00 AM IST + - cron: "30 11 * * *" # 4:00 PM IST + workflow_dispatch: + +jobs: + generate-metrics: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Install Dependencies + run: | + sudo apt-get update + sudo apt-get remove -y containerd + sudo apt-get install -y docker.io jq containerd.io + + - name: Generate Metrics SVG + uses: lowlighter/metrics@latest + with: + filename: SVG/followup.svg + token: ${{ secrets.GH_METRICS_TOKEN }} + user: iamAntimPal + base: "" + template: classic + config_timezone: Asia/Kolkata + plugin_followup: yes + plugin_followup_sections: repositories, user + plugin_followup_indepth: yes + plugin_followup_archived: no + + - name: Commit and Push Changes + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git checkout -b update-followup-metrics + git add SVG/followup.svg + git commit -m "Update followup.svg [skip ci]" + git push origin update-followup-metrics + + - name: Create Pull Request + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.GH_METRICS_TOKEN }} + branch: update-followup-metrics + title: "chore: update followup metrics" + body: "Auto-generated followup metrics SVG" diff --git a/.github/workflows/update-date.yml b/.github/workflows/update-date.yml new file mode 100644 index 0000000..309720d --- /dev/null +++ b/.github/workflows/update-date.yml @@ -0,0 +1,29 @@ +name: Update Date in README + +on: + schedule: + - cron: "0 0 * * *" # Runs daily at midnight UTC + workflow_dispatch: # Allows manual triggering + +jobs: + update-date: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Update date in README.md + run: | + # Get current date (customize format below) + CURRENT_DATE=$(date +'%Y-%m-%d') + + # Replace placeholder with the current date + sed -i "s//${CURRENT_DATE}/g" README.md + + - name: Commit and push changes + run: | + git config user.name "github-actions" + git config user.email "actions@github.com" + git add README.md + git commit -m "Update date in README" || echo "No changes to commit" + git push diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b4c093a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +profile.md \ No newline at end of file diff --git a/176. Second heighest salary.sql b/176. Second heighest salary.sql deleted file mode 100644 index 3d17f1d..0000000 --- a/176. Second heighest salary.sql +++ /dev/null @@ -1,4 +0,0 @@ - -# Write your MSSQL query statement below -select max(salary) as SecondHighestSalary from employee -where salary not in (select max(salary) from employee) diff --git a/177. Nth Highest Salary.sql b/177. Nth Highest Salary.sql deleted file mode 100644 index a79d1d5..0000000 --- a/177. Nth Highest Salary.sql +++ /dev/null @@ -1,28 +0,0 @@ -/* -* Order By Clause - * ORDER BY order_by_expression - [ COLLATE collation_name ] - [ ASC | DESC ] - [ ,...n ] -[ ] - - ::= -{ - OFFSET { integer_constant | offset_row_count_expression } { ROW | ROWS } - [ - FETCH { FIRST | NEXT } {integer_constant | fetch_row_count_expression } { ROW | ROWS } ONLY - ] -} -*/ - -Create FUNCTION getNthHighestSalary(@N INT) returns INT as -BEGIN - Return( - Select Salary - From Employee - Gourp By Salary - Order By Salary DESC - Offset @N-1 rows - Fetch First 1 Rows Only - ); - End diff --git a/CONTTRIBUTE.MD b/CONTTRIBUTE.MD new file mode 100644 index 0000000..7a1832c --- /dev/null +++ b/CONTTRIBUTE.MD @@ -0,0 +1,168 @@ +# Contributing to LeetCode_Daily_Task + +Thank you for your interest in contributing to **LeetCode_Daily_Task**! + +--- + +## Table of Contents +- [Contributing to LeetCode\_Daily\_Task](#contributing-to-leetcode_daily_task) + - [Table of Contents](#table-of-contents) + - [How to Contribute](#how-to-contribute) + - [Impressive Contributions \& Highlights](#impressive-contributions--highlights) + - [Contributor Recognition Process](#contributor-recognition-process) + - [Share Your Thoughts](#share-your-thoughts) + - [Issue Reporting](#issue-reporting) + - [Pull Request Process](#pull-request-process) + - [Coding Guidelines](#coding-guidelines) + - [Commit Message Guidelines](#commit-message-guidelines) + - [Acknowledgments](#acknowledgments) + +--- + +## How to Contribute + +1. **Fork the Repository:** + Click the **Fork** button on the repository page to create your own copy. + +2. **Clone Your Fork:** + ```bash + git clone https://github.com/your-username/LeetCode_Daily_Task.git + ``` + +3. **Create a Branch:** + Create a new branch for your feature, solution, or bug fix. + ```bash + git checkout -b feature/your-feature-name + ``` + +4. **Implement Your Changes:** + - Add your solution or enhancements. + - Include any discussion points or explanations in your code comments or PR description. + - Follow the coding guidelines (see below). + - Update or add test cases as needed. + +5. **Commit Your Changes:** + Use clear and descriptive commit messages (see Commit Message Guidelines). + +6. **Open a Pull Request:** + Submit a pull request (PR) with a clear title and description outlining your changes, and reference any related issues. + +--- + +## Impressive Contributions & Highlights + +We love to see innovative and creative solutions! This section is dedicated to showcasing impressive contributions that go beyond the daily task: + +- **What to Share:** + - **Enhanced Solutions:** Share optimized, creative, or particularly elegant approaches to LeetCode problems. + - **Detailed Explanations:** Provide in-depth explanations and insights that help others understand your reasoning. + - **Additional Context:** Include performance comparisons, alternative methods, or advanced techniques. + +- **How to Get Recognized:** + The first contributor to submit an impressive solution for a given challenge will have their name highlighted in this section. For example: + ```md + **Impressive Contribution by:** @username + ``` + To be recognized, please send an email with your GitHub username and Gmail address to **optimismeducator@gmail.com**. This helps us track and celebrate innovative contributions. + +- **Discussion:** + Engage with others by discussing alternative approaches or asking questions in the issue tracker or PR comments. Your input can help refine the solution and inspire further improvements. + + +--- + +## Contributor Recognition Process + +- **How It Works:** + Exceptional contributions are recognized to encourage innovative thinking. + +- **Steps to Get Recognized:** + 1. Submit your solution via a PR. + 2. Once your PR is merged, send an email with your GitHub username and Gmail address to **optimismeducator@gmail.com**. + 3. Your details will be added to the "Impressive Contributions & Highlights" section for that challenge. + + +--- + +## Share Your Thoughts + +We welcome your feedback and ideas! +- **Questions & Suggestions:** Open an issue or start a discussion if you have ideas for improvements, new challenges, or if you need help. +- **Solution Explanations:** Share your approach and insights to help others understand the problem better. +- **Improvements:** If you see a more efficient or elegant solution, feel free to contribute your version. + +--- + +## Issue Reporting + +- **Found a Bug or Have a Suggestion?** + Open a new issue on GitHub with a descriptive title and details on how to reproduce the problem or suggest improvements. +- **Include the Following Information:** + - A clear explanation of the issue. + - Steps to reproduce the problem. + - Environment details if applicable (e.g., OS, specific LeetCode problem IDs). + +--- + +## Pull Request Process + +1. **Ensure Tests Pass:** + Verify that your changes pass all existing tests. If you’re adding new functionality, please include corresponding tests. + +2. **Provide a Descriptive Title:** + Your PR title should succinctly describe your changes (e.g., "Add optimized solution for Two Sum challenge"). + +3. **Detailed Description:** + In your PR, include: + - The purpose of your changes. + - Instructions on how to test your changes. + - Additional context, screenshots, or references to related issues. + - If you're submitting an impressive solution, include a note so we can update the recognition section. + +4. **Review & Feedback:** + Your PR will be reviewed by the maintainers. Please address any feedback or requested changes promptly. + +--- + +## Coding Guidelines + +- **Consistent Formatting:** + Follow the repository’s established style for code formatting, naming conventions, and folder structure. + +- **Comment Your Code:** + Provide clear comments explaining your logic, especially for complex solutions. + +- **Efficiency and Readability:** + Ensure your solutions are optimized for performance and are easy to understand. + +- **Documentation:** + Update or add documentation if your changes affect existing functionality. + +--- + +## Commit Message Guidelines + +- **Short & Descriptive:** + The first line of your commit message should be a brief summary (50 characters or less). + +- **Imperative Mood:** + Use the imperative mood (e.g., "Add", "Fix", "Update"). + +- **Detailed Description:** + If necessary, include a detailed description after the first line, separated by a blank line. + +Example: +```txt +Add solution for Two Sum problem + +This commit introduces an optimized solution for the Two Sum problem, along with test cases and documentation. +``` + +--- + +## Acknowledgments + +Thank you for contributing to **LeetCode_Daily_Task**! +Your help and commitment make this project a valuable resource for the community. If you have any questions or need further assistance, feel free to open an issue or contact the maintainers. + +> ## Happy coding and happy LeetCoding! πŸš€ diff --git a/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/1045. Customers Who Bought All Products.py b/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/1045. Customers Who Bought All Products.py new file mode 100644 index 0000000..5db1189 --- /dev/null +++ b/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/1045. Customers Who Bought All Products.py @@ -0,0 +1,21 @@ +import pandas as pd + +# Sample data +customer_data = {'customer_id': [1, 2, 3, 3, 1], + 'product_key': [5, 6, 5, 6, 6]} +product_data = {'product_key': [5, 6]} + +# Create DataFrames +customer_df = pd.DataFrame(customer_data) +product_df = pd.DataFrame(product_data) + +# Get the total number of products +total_products = product_df['product_key'].nunique() + +# Count distinct products per customer +customer_purchase = customer_df.groupby('customer_id')['product_key'].nunique() + +# Filter customers who bought all products +result = customer_purchase[customer_purchase == total_products].reset_index() + +print(result) \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/1045. Customers Who Bought All Products.sql b/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/1045. Customers Who Bought All Products.sql new file mode 100644 index 0000000..15c730d --- /dev/null +++ b/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/1045. Customers Who Bought All Products.sql @@ -0,0 +1,68 @@ +1045. Customers Who Bought All Products + +Table: Customer + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| customer_id | int | +| product_key | int | ++-------------+---------+ +This table may contain duplicates rows. +customer_id is not NULL. +product_key is a foreign key (reference column) to Product table. + + +Table: Product + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| product_key | int | ++-------------+---------+ +product_key is the primary key (column with unique values) for this table. + + +Write a solution to report the customer ids from the Customer table that bought all the products in the Product table. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Customer table: ++-------------+-------------+ +| customer_id | product_key | ++-------------+-------------+ +| 1 | 5 | +| 2 | 6 | +| 3 | 5 | +| 3 | 6 | +| 1 | 6 | ++-------------+-------------+ +Product table: ++-------------+ +| product_key | ++-------------+ +| 5 | +| 6 | ++-------------+ +Output: ++-------------+ +| customer_id | ++-------------+ +| 1 | +| 3 | ++-------------+ +Explanation: +The customers who bought all the products (5 and 6) are customers with IDs 1 and 3. + + + +# Write your MySQL query statement below +SELECT customer_id FROM Customer GROUP BY customer_id +HAVING COUNT(distinct product_key) = (SELECT COUNT(product_key) FROM Product); \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/readme.md b/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/readme.md new file mode 100644 index 0000000..cedd5e5 --- /dev/null +++ b/LeetCode SQL 50 Solution/1045. Customers Who Bought All Products/readme.md @@ -0,0 +1,178 @@ + +# **1045. Customers Who Bought All Products** + +## **Problem Statement** +You are given two tables: +- `Customer` (contains `customer_id` and `product_key`) +- `Product` (contains all available `product_key`s) + +Each `product_key` in `Customer` is a **foreign key** referring to the `Product` table. + +### **Customer Table** +``` ++-------------+-------------+ +| Column Name | Type | ++-------------+-------------+ +| customer_id | int | +| product_key | int | ++-------------+-------------+ +``` +- The table may contain **duplicate rows**. +- `customer_id` is **not NULL**. +- `product_key` refers to the `Product` table. + +### **Product Table** +``` ++-------------+ +| product_key | ++-------------+ +| int | ++-------------+ +``` +- `product_key` is the **primary key** (unique values) of this table. + +### **Task:** +Find **all customer IDs** who bought **every product** listed in the `Product` table. + +--- + +## **Example 1:** + +### **Input:** +**Customer Table** +``` ++-------------+-------------+ +| customer_id | product_key | ++-------------+-------------+ +| 1 | 5 | +| 2 | 6 | +| 3 | 5 | +| 3 | 6 | +| 1 | 6 | ++-------------+-------------+ +``` + +**Product Table** +``` ++-------------+ +| product_key | ++-------------+ +| 5 | +| 6 | ++-------------+ +``` + +### **Output:** +``` ++-------------+ +| customer_id | ++-------------+ +| 1 | +| 3 | ++-------------+ +``` + +### **Explanation:** +- There are **two products** (5 and 6). +- Customers who bought **both** products: + - **Customer 1**: Bought `5, 6` βœ… + - **Customer 2**: Bought `6` ❌ (missing `5`) + - **Customer 3**: Bought `5, 6` βœ… +- So, **customers 1 and 3** are returned. + +--- + +## **SQL Solutions** + +### **1️⃣ Standard MySQL Solution** +```sql +SELECT customer_id +FROM Customer +GROUP BY customer_id +HAVING COUNT(DISTINCT product_key) = (SELECT COUNT(product_key) FROM Product); +``` +#### **Explanation:** +1. **GROUP BY `customer_id`** β†’ Group purchases per customer. +2. **COUNT(DISTINCT product_key)** β†’ Count unique products each customer bought. +3. **Compare with total products:** + - `(SELECT COUNT(product_key) FROM Product)` counts all available products. + - Only customers with `COUNT(DISTINCT product_key) = total products` are included. +4. **HAVING** ensures we return only those who bought **all products**. + +--- + +### **2️⃣ Window Function (SQL) Solution** +```sql +WITH product_count AS ( + SELECT COUNT(*) AS total_products FROM Product +), +customer_purchase AS ( + SELECT customer_id, COUNT(DISTINCT product_key) AS purchased_count + FROM Customer + GROUP BY customer_id +) +SELECT customer_id +FROM customer_purchase, product_count +WHERE customer_purchase.purchased_count = product_count.total_products; +``` +#### **Explanation:** +1. **CTE `product_count`** β†’ Stores total number of products in `Product` table. +2. **CTE `customer_purchase`** β†’ Groups purchases per customer and counts distinct products. +3. **Final SELECT query** β†’ Compares each customer's purchase count with `total_products` and returns only those who match. + +--- + +## **Pandas Solution (Python)** +```python +import pandas as pd + +# Sample data +customer_data = {'customer_id': [1, 2, 3, 3, 1], + 'product_key': [5, 6, 5, 6, 6]} +product_data = {'product_key': [5, 6]} + +# Create DataFrames +customer_df = pd.DataFrame(customer_data) +product_df = pd.DataFrame(product_data) + +# Get the total number of products +total_products = product_df['product_key'].nunique() + +# Count distinct products per customer +customer_purchase = customer_df.groupby('customer_id')['product_key'].nunique() + +# Filter customers who bought all products +result = customer_purchase[customer_purchase == total_products].reset_index() + +print(result) +``` + +### **Explanation:** +1. **Create DataFrames** β†’ Convert customer and product tables into Pandas DataFrames. +2. **Get total unique products** β†’ `product_df['product_key'].nunique()` +3. **Count distinct products per customer** β†’ `.groupby('customer_id')['product_key'].nunique()` +4. **Filter customers who match total products** β†’ Customers with `purchased_count == total_products` +5. **Return final result**. + +--- + +## **File Structure** +``` +LeetCode1045/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solutions (Standard + Window Functions). +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 1045](https://leetcode.com/problems/customers-who-bought-all-products/) +- [SQL GROUP BY Documentation](https://www.w3schools.com/sql/sql_groupby.asp) +- [SQL HAVING Clause](https://www.w3schools.com/sql/sql_having.asp) +- [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) + +---ture & Useful Links** + +πŸš€ **Now it's a complete guide!** πŸš€ diff --git a/LeetCode SQL 50 Solution/1068. Product Sales Analysis I/1068. Product Sales Analysis I.py b/LeetCode SQL 50 Solution/1068. Product Sales Analysis I/1068. Product Sales Analysis I.py new file mode 100644 index 0000000..a23b6c2 --- /dev/null +++ b/LeetCode SQL 50 Solution/1068. Product Sales Analysis I/1068. Product Sales Analysis I.py @@ -0,0 +1,20 @@ +import pandas as pd + +# Sample Data +sales_data = {'sale_id': [1, 2, 7], + 'product_id': [100, 100, 200], + 'year': [2008, 2009, 2011], + 'quantity': [10, 12, 15], + 'price': [5000, 5000, 9000]} + +product_data = {'product_id': [100, 200, 300], + 'product_name': ['Nokia', 'Apple', 'Samsung']} + +# Create DataFrames +sales_df = pd.DataFrame(sales_data) +product_df = pd.DataFrame(product_data) + +# Perform Join +result = sales_df.merge(product_df, on='product_id')[['product_name', 'year', 'price']] + +print(result) \ No newline at end of file diff --git a/1068. Product Sales Analysis I.sql b/LeetCode SQL 50 Solution/1068. Product Sales Analysis I/1068. Product Sales Analysis I.sql similarity index 100% rename from 1068. Product Sales Analysis I.sql rename to LeetCode SQL 50 Solution/1068. Product Sales Analysis I/1068. Product Sales Analysis I.sql diff --git a/LeetCode SQL 50 Solution/1068. Product Sales Analysis I/readme.md b/LeetCode SQL 50 Solution/1068. Product Sales Analysis I/readme.md new file mode 100644 index 0000000..94b47f5 --- /dev/null +++ b/LeetCode SQL 50 Solution/1068. Product Sales Analysis I/readme.md @@ -0,0 +1,162 @@ + + +# **1068. Product Sales Analysis I** + +## **Problem Statement** +You are given two tables: + +- `Sales` (contains sales data including `product_id`, `year`, `quantity`, and `price`). +- `Product` (contains `product_id` and `product_name`). + +Each `product_id` in `Sales` is a **foreign key** referring to the `Product` table. + +### **Sales Table** +``` ++---------+------------+------+----------+-------+ +| sale_id | product_id | year | quantity | price | ++---------+------------+------+----------+-------+ +| int | int | int | int | int | ++---------+------------+------+----------+-------+ +``` +- `(sale_id, year)` is the **primary key** (unique values). +- `product_id` refers to the `Product` table. +- `price` represents the **per unit price** of the product in that year. + +### **Product Table** +``` ++------------+--------------+ +| product_id | product_name | ++------------+--------------+ +| int | varchar | ++------------+--------------+ +``` +- `product_id` is the **primary key** of this table. + +### **Task:** +Find the `product_name`, `year`, and `price` for each sale in the `Sales` table. + +--- + +## **Example 1:** + +### **Input:** +**Sales Table** +``` ++---------+------------+------+----------+-------+ +| sale_id | product_id | year | quantity | price | ++---------+------------+------+----------+-------+ +| 1 | 100 | 2008 | 10 | 5000 | +| 2 | 100 | 2009 | 12 | 5000 | +| 7 | 200 | 2011 | 15 | 9000 | ++---------+------------+------+----------+-------+ +``` + +**Product Table** +``` ++------------+--------------+ +| product_id | product_name | ++------------+--------------+ +| 100 | Nokia | +| 200 | Apple | +| 300 | Samsung | ++------------+--------------+ +``` + +### **Output:** +``` ++--------------+-------+-------+ +| product_name | year | price | ++--------------+-------+-------+ +| Nokia | 2008 | 5000 | +| Nokia | 2009 | 5000 | +| Apple | 2011 | 9000 | ++--------------+-------+-------+ +``` + +### **Explanation:** +- **Sale ID 1:** `Nokia` was sold in **2008** for **5000**. +- **Sale ID 2:** `Nokia` was sold in **2009** for **5000**. +- **Sale ID 7:** `Apple` was sold in **2011** for **9000**. + +--- + +## **SQL Solutions** + +### **1️⃣ Standard MySQL Solution** +```sql +SELECT p.product_name, s.year, s.price +FROM Sales s +JOIN Product p ON s.product_id = p.product_id; +``` +#### **Explanation:** +1. **JOIN** the `Sales` table with the `Product` table using `product_id`. +2. **Select `product_name`, `year`, and `price`** from the joined result. + +--- + +### **2️⃣ Window Function (SQL) Solution** +```sql +WITH SalesData AS ( + SELECT s.product_id, s.year, s.price, p.product_name + FROM Sales s + JOIN Product p ON s.product_id = p.product_id +) +SELECT product_name, year, price +FROM SalesData; +``` +#### **Explanation:** +1. **CTE `SalesData`** β†’ Stores the joined data from `Sales` and `Product`. +2. **Final SELECT** β†’ Retrieves `product_name`, `year`, and `price`. + +--- + +## **Pandas Solution (Python)** +```python +import pandas as pd + +# Sample Data +sales_data = {'sale_id': [1, 2, 7], + 'product_id': [100, 100, 200], + 'year': [2008, 2009, 2011], + 'quantity': [10, 12, 15], + 'price': [5000, 5000, 9000]} + +product_data = {'product_id': [100, 200, 300], + 'product_name': ['Nokia', 'Apple', 'Samsung']} + +# Create DataFrames +sales_df = pd.DataFrame(sales_data) +product_df = pd.DataFrame(product_data) + +# Perform Join +result = sales_df.merge(product_df, on='product_id')[['product_name', 'year', 'price']] + +print(result) +``` + +### **Explanation:** +1. **Create DataFrames** β†’ Convert `Sales` and `Product` tables into Pandas DataFrames. +2. **Perform `merge()` on `product_id`** β†’ Equivalent to SQL `JOIN`. +3. **Select required columns (`product_name`, `year`, `price`)**. + +--- + +## **File Structure** +``` +LeetCode1068/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solutions (Standard + Window Functions). +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 1068](https://leetcode.com/problems/product-sales-analysis-i/) +- [SQL JOIN Documentation](https://www.w3schools.com/sql/sql_join.asp) +- [Pandas Merge Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) + + + +## πŸš€ **Now it's a complete guide!** πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/1070. Product Sales Analysis III.py b/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/1070. Product Sales Analysis III.py new file mode 100644 index 0000000..310d14c --- /dev/null +++ b/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/1070. Product Sales Analysis III.py @@ -0,0 +1,19 @@ +import pandas as pd + +# Sample Data +sales_data = {'sale_id': [1, 2, 7], + 'product_id': [100, 100, 200], + 'year': [2008, 2009, 2011], + 'quantity': [10, 12, 15], + 'price': [5000, 5000, 9000]} + +# Create DataFrame +sales_df = pd.DataFrame(sales_data) + +# Find the first sale per product +first_sales = sales_df.loc[sales_df.groupby('product_id')['year'].idxmin(), ['product_id', 'year', 'quantity', 'price']] + +# Rename columns +first_sales.rename(columns={'year': 'first_year'}, inplace=True) + +print(first_sales) \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/1070. Product Sales Analysis III.sql b/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/1070. Product Sales Analysis III.sql new file mode 100644 index 0000000..f284841 --- /dev/null +++ b/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/1070. Product Sales Analysis III.sql @@ -0,0 +1,85 @@ +1070. Product Sales Analysis III + + ++-------------+-------+ +| Column Name | Type | ++-------------+-------+ +| sale_id | int | +| product_id | int | +| year | int | +| quantity | int | +| price | int | ++-------------+-------+ +(sale_id, year) is the primary key (combination of columns with unique values) of this table. +product_id is a foreign key (reference column) to Product table. +Each row of this table shows a sale on the product product_id in a certain year. +Note that the price is per unit. + + +Table: Product + ++--------------+---------+ +| Column Name | Type | ++--------------+---------+ +| product_id | int | +| product_name | varchar | ++--------------+---------+ +product_id is the primary key (column with unique values) of this table. +Each row of this table indicates the product name of each product. + + +Write a solution to select the product id, year, quantity, and price for the first year of every product sold. + +Return the resulting table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Sales table: ++---------+------------+------+----------+-------+ +| sale_id | product_id | year | quantity | price | ++---------+------------+------+----------+-------+ +| 1 | 100 | 2008 | 10 | 5000 | +| 2 | 100 | 2009 | 12 | 5000 | +| 7 | 200 | 2011 | 15 | 9000 | ++---------+------------+------+----------+-------+ +Product table: ++------------+--------------+ +| product_id | product_name | ++------------+--------------+ +| 100 | Nokia | +| 200 | Apple | +| 300 | Samsung | ++------------+--------------+ +Output: ++------------+------------+----------+-------+ +| product_id | first_year | quantity | price | ++------------+------------+----------+-------+ +| 100 | 2008 | 10 | 5000 | +| 200 | 2011 | 15 | 9000 | ++------------+------------+----------+-------+ + + + +# Write your MySQL query statement below +SELECT + product_id, + year AS first_year, + quantity, + price +FROM + Sales +WHERE + (product_id, year) IN ( + SELECT + product_id, + MIN(year) AS year + FROM + Sales + GROUP BY + product_id + ); \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/readme.md b/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/readme.md new file mode 100644 index 0000000..b8bff49 --- /dev/null +++ b/LeetCode SQL 50 Solution/1070. Product Sales Analysis III/readme.md @@ -0,0 +1,179 @@ + +# **1070. Product Sales Analysis III** + +## **Problem Statement** +You are given two tables: + +- `Sales` (contains sales data including `product_id`, `year`, `quantity`, and `price`). +- `Product` (contains `product_id` and `product_name`). + +Each `product_id` in `Sales` is a **foreign key** referring to the `Product` table. + +### **Sales Table** +``` ++---------+------------+------+----------+-------+ +| sale_id | product_id | year | quantity | price | ++---------+------------+------+----------+-------+ +| int | int | int | int | int | ++---------+------------+------+----------+-------+ +``` +- `(sale_id, year)` is the **primary key** (unique values). +- `product_id` refers to the `Product` table. +- `price` represents the **per unit price** of the product in that year. + +### **Product Table** +``` ++------------+--------------+ +| product_id | product_name | ++------------+--------------+ +| int | varchar | ++------------+--------------+ +``` +- `product_id` is the **primary key** of this table. + +### **Task:** +Find the `product_id`, `first_year`, `quantity`, and `price` for **the first year a product was sold**. + +--- + +## **Example 1:** + +### **Input:** +**Sales Table** +``` ++---------+------------+------+----------+-------+ +| sale_id | product_id | year | quantity | price | ++---------+------------+------+----------+-------+ +| 1 | 100 | 2008 | 10 | 5000 | +| 2 | 100 | 2009 | 12 | 5000 | +| 7 | 200 | 2011 | 15 | 9000 | ++---------+------------+------+----------+-------+ +``` + +**Product Table** +``` ++------------+--------------+ +| product_id | product_name | ++------------+--------------+ +| 100 | Nokia | +| 200 | Apple | +| 300 | Samsung | ++------------+--------------+ +``` + +### **Output:** +``` ++------------+------------+----------+-------+ +| product_id | first_year | quantity | price | ++------------+------------+----------+-------+ +| 100 | 2008 | 10 | 5000 | +| 200 | 2011 | 15 | 9000 | ++------------+------------+----------+-------+ +``` + +### **Explanation:** +- **Product 100 (Nokia):** First sold in **2008** with **10 units** at **5000** price. +- **Product 200 (Apple):** First sold in **2011** with **15 units** at **9000** price. + +--- + +## **SQL Solutions** + +### **1️⃣ Standard MySQL Solution** +```sql +SELECT + product_id, + year AS first_year, + quantity, + price +FROM + Sales +WHERE + (product_id, year) IN ( + SELECT + product_id, + MIN(year) AS year + FROM + Sales + GROUP BY + product_id + ); +``` +#### **Explanation:** +1. **Subquery (`MIN(year)`)** β†’ Finds the **first year** (`MIN(year)`) each `product_id` was sold. +2. **Filter the main table** β†’ Selects rows matching the **earliest year** for each product. + +--- + +### **2️⃣ Window Function (SQL) Solution** +```sql +WITH RankedSales AS ( + SELECT + product_id, + year AS first_year, + quantity, + price, + RANK() OVER (PARTITION BY product_id ORDER BY year ASC) AS rnk + FROM Sales +) +SELECT product_id, first_year, quantity, price +FROM RankedSales +WHERE rnk = 1; +``` +#### **Explanation:** +1. **`RANK() OVER (PARTITION BY product_id ORDER BY year ASC)`** + - Assigns **rank 1** to the first sale per `product_id`. +2. **Filter (`WHERE rnk = 1`)** + - Retrieves **only the first sale per product**. + +--- + +## **Pandas Solution (Python)** +```python +import pandas as pd + +# Sample Data +sales_data = {'sale_id': [1, 2, 7], + 'product_id': [100, 100, 200], + 'year': [2008, 2009, 2011], + 'quantity': [10, 12, 15], + 'price': [5000, 5000, 9000]} + +# Create DataFrame +sales_df = pd.DataFrame(sales_data) + +# Find the first sale per product +first_sales = sales_df.loc[sales_df.groupby('product_id')['year'].idxmin(), ['product_id', 'year', 'quantity', 'price']] + +# Rename columns +first_sales.rename(columns={'year': 'first_year'}, inplace=True) + +print(first_sales) +``` + +### **Explanation:** +1. **Create DataFrame** β†’ Convert `Sales` table into Pandas DataFrame. +2. **Group by `product_id` and get the `idxmin()` of `year`** β†’ Finds the first sale per product. +3. **Retrieve `product_id`, `year`, `quantity`, and `price`**. +4. **Rename `year` to `first_year`**. + +--- + +## **File Structure** +``` +LeetCode1070/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solutions (Standard + Window Functions). +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 1070](https://leetcode.com/problems/product-sales-analysis-iii/) +- [SQL JOIN Documentation](https://www.w3schools.com/sql/sql_join.asp) +- [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) + + +## πŸš€ **Now it's a complete guide!** πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1075. Project Employees I/1075. Project Employees I.py b/LeetCode SQL 50 Solution/1075. Project Employees I/1075. Project Employees I.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1075. Project Employees I/1075. Project Employees I.sql b/LeetCode SQL 50 Solution/1075. Project Employees I/1075. Project Employees I.sql new file mode 100644 index 0000000..385a71d --- /dev/null +++ b/LeetCode SQL 50 Solution/1075. Project Employees I/1075. Project Employees I.sql @@ -0,0 +1,82 @@ +1075. Project Employees I +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Project + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| project_id | int | +| employee_id | int | ++-------------+---------+ +(project_id, employee_id) is the primary key of this table. +employee_id is a foreign key to Employee table. +Each row of this table indicates that the employee with employee_id is working on the project with project_id. + + +Table: Employee + ++------------------+---------+ +| Column Name | Type | ++------------------+---------+ +| employee_id | int | +| name | varchar | +| experience_years | int | ++------------------+---------+ +employee_id is the primary key of this table. It's guaranteed that experience_years is not NULL. +Each row of this table contains information about one employee. + + +Write an SQL query that reports the average experience years of all the employees for each project, rounded to 2 digits. + +Return the result table in any order. + +The query result format is in the following example. + + + +Example 1: + +Input: +Project table: ++-------------+-------------+ +| project_id | employee_id | ++-------------+-------------+ +| 1 | 1 | +| 1 | 2 | +| 1 | 3 | +| 2 | 1 | +| 2 | 4 | ++-------------+-------------+ +Employee table: ++-------------+--------+------------------+ +| employee_id | name | experience_years | ++-------------+--------+------------------+ +| 1 | Khaled | 3 | +| 2 | Ali | 2 | +| 3 | John | 1 | +| 4 | Doe | 2 | ++-------------+--------+------------------+ +Output: ++-------------+---------------+ +| project_id | average_years | ++-------------+---------------+ +| 1 | 2.00 | +| 2 | 2.50 | ++-------------+---------------+ +Explanation: The average experience years for the first project is (3 + 2 + 1) / 3 = 2.00 and for the second project is (3 + 2) / 2 = 2.50 + +'''SQL + +# Write your MySQL query statement below +select project_id , round(avg(experience_years), 2) as average_years +from project as p +left join employee as e +on p.employee_id = e.employee_id +group by project_id; + +''' \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1075. Project Employees I/readme.md b/LeetCode SQL 50 Solution/1075. Project Employees I/readme.md new file mode 100644 index 0000000..4c1d8da --- /dev/null +++ b/LeetCode SQL 50 Solution/1075. Project Employees I/readme.md @@ -0,0 +1,131 @@ + +# πŸ† Project Employees I - LeetCode 1075 + +## πŸ“Œ Problem Statement +You are given two tables: **Project** and **Employee**. + +### Project Table +| Column Name | Type | +| ----------- | ---- | +| project_id | int | +| employee_id | int | + +- `(project_id, employee_id)` is the primary key of this table. +- `employee_id` is a foreign key referencing the `Employee` table. + +### Employee Table +| Column Name | Type | +| ---------------- | ------- | +| employee_id | int | +| name | varchar | +| experience_years | int | + +- `employee_id` is the primary key. +- `experience_years` is guaranteed to be **NOT NULL**. + +The task is to **return the average experience years of all employees for each project, rounded to 2 decimal places**. + +--- + +## πŸ“Š Example 1: +### Input: +**Project Table** +| project_id | employee_id | +| ---------- | ----------- | +| 1 | 1 | +| 1 | 2 | +| 1 | 3 | +| 2 | 1 | +| 2 | 4 | + +**Employee Table** +| employee_id | name | experience_years | +| ----------- | ------ | ---------------- | +| 1 | Khaled | 3 | +| 2 | Ali | 2 | +| 3 | John | 1 | +| 4 | Doe | 2 | + +### Output: +| project_id | average_years | +| ---------- | ------------- | +| 1 | 2.00 | +| 2 | 2.50 | + +### Explanation: +- **Project 1:** `(3 + 2 + 1) / 3 = 2.00` +- **Project 2:** `(3 + 2) / 2 = 2.50` + +--- + +## πŸ–₯ SQL Solutions + +### 1️⃣ Standard MySQL Solution +#### Explanation: +- We **JOIN** the `Project` and `Employee` tables using `employee_id`. +- We **calculate the average** of `experience_years` for each `project_id`. +- We **round** the result to **two decimal places**. + +```sql +SELECT project_id, ROUND(AVG(experience_years), 2) AS average_years +FROM project AS p +LEFT JOIN employee AS e +ON p.employee_id = e.employee_id +GROUP BY project_id; +``` + +--- + +### 2️⃣ Window Function (SQL) Solution +#### Explanation: +- Using **window functions**, we calculate the `AVG(experience_years)` over a **partitioned** dataset. + +```sql +SELECT DISTINCT project_id, + ROUND(AVG(experience_years) OVER (PARTITION BY project_id), 2) AS average_years +FROM project AS p +JOIN employee AS e +ON p.employee_id = e.employee_id; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### Explanation: +- We read both tables into Pandas **DataFrames**. +- We merge the tables on `employee_id`. +- We group by `project_id` and compute the mean. +- We round the output to 2 decimal places. + +```python +import pandas as pd + +def project_average_experience(project: pd.DataFrame, employee: pd.DataFrame) -> pd.DataFrame: + df = project.merge(employee, on="employee_id") + result = df.groupby("project_id")["experience_years"].mean().round(2).reset_index() + result.columns = ["project_id", "average_years"] + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Project-Employees-I +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_window.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/project-employees-i/) +- πŸ“š [SQL Joins Explanation](https://www.w3schools.com/sql/sql_join.asp) +- 🐍 [Pandas Documentation](https://pandas.pydata.org/docs/) + +--- + +## Let me know if you need any modifications! πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/1141. User Activity for the Past 30 Days I.py b/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/1141. User Activity for the Past 30 Days I.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/1141. User Activity for the Past 30 Days I.sql b/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/1141. User Activity for the Past 30 Days I.sql new file mode 100644 index 0000000..e3a7bda --- /dev/null +++ b/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/1141. User Activity for the Past 30 Days I.sql @@ -0,0 +1,67 @@ +1141. User Activity for the Past 30 Days I + +Table: Activity + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| user_id | int | +| session_id | int | +| activity_date | date | +| activity_type | enum | ++---------------+---------+ +This table may have duplicate rows. +The activity_type column is an ENUM (category) of type ('open_session', 'end_session', 'scroll_down', 'send_message'). +The table shows the user activities for a social media website. +Note that each session belongs to exactly one user. + + +Write a solution to find the daily active user count for a period of 30 days ending 2019-07-27 inclusively. A user was active on someday if they made at least one activity on that day. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Activity table: ++---------+------------+---------------+---------------+ +| user_id | session_id | activity_date | activity_type | ++---------+------------+---------------+---------------+ +| 1 | 1 | 2019-07-20 | open_session | +| 1 | 1 | 2019-07-20 | scroll_down | +| 1 | 1 | 2019-07-20 | end_session | +| 2 | 4 | 2019-07-20 | open_session | +| 2 | 4 | 2019-07-21 | send_message | +| 2 | 4 | 2019-07-21 | end_session | +| 3 | 2 | 2019-07-21 | open_session | +| 3 | 2 | 2019-07-21 | send_message | +| 3 | 2 | 2019-07-21 | end_session | +| 4 | 3 | 2019-06-25 | open_session | +| 4 | 3 | 2019-06-25 | end_session | ++---------+------------+---------------+---------------+ +Output: ++------------+--------------+ +| day | active_users | ++------------+--------------+ +| 2019-07-20 | 2 | +| 2019-07-21 | 2 | ++------------+--------------+ +Explanation: Note that we do not care about days with zero active users. + +# 1141. User Activity for the Past 30 Days I + +# Find the daily active user count for a period of 30 days ending 2019-07-27 inclusively. A user was active on some day if he/she made at least one activity on that day. +# Note that we do not care about days with zero active users + +SELECT + activity_date AS day, + COUNT(DISTINCT user_id) AS active_users +FROM + Activity +WHERE + DATEDIFF('2019-07-27', activity_date) < 30 AND DATEDIFF('2019-07-27', activity_date)>=0 +GROUP BY 1 \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/readme.md b/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/readme.md new file mode 100644 index 0000000..fd44f51 --- /dev/null +++ b/LeetCode SQL 50 Solution/1141. User Activity for the Past 30 Days I/readme.md @@ -0,0 +1,135 @@ + +# πŸ“Š User Activity for the Past 30 Days I - LeetCode 1141 + +## πŸ“Œ Problem Statement +You are given the **Activity** table that records user activities on a social media website. + +### Activity Table +| Column Name | Type | +| ------------- | ---- | +| user_id | int | +| session_id | int | +| activity_date | date | +| activity_type | enum | + +- The `activity_type` column is an ENUM of **('open_session', 'end_session', 'scroll_down', 'send_message')**. +- Each session belongs to exactly **one user**. +- The table **may have duplicate rows**. + +### Task: +Find the **daily active user count** for a period of **30 days ending 2019-07-27 inclusively**. +- A user is considered **active on a given day** if they made at least **one activity**. +- Ignore days with **zero active users**. + +--- + +## πŸ“Š Example 1: +### Input: +**Activity Table** +| user_id | session_id | activity_date | activity_type | +| ------- | ---------- | ------------- | ------------- | +| 1 | 1 | 2019-07-20 | open_session | +| 1 | 1 | 2019-07-20 | scroll_down | +| 1 | 1 | 2019-07-20 | end_session | +| 2 | 4 | 2019-07-20 | open_session | +| 2 | 4 | 2019-07-21 | send_message | +| 2 | 4 | 2019-07-21 | end_session | +| 3 | 2 | 2019-07-21 | open_session | +| 3 | 2 | 2019-07-21 | send_message | +| 3 | 2 | 2019-07-21 | end_session | +| 4 | 3 | 2019-06-25 | open_session | +| 4 | 3 | 2019-06-25 | end_session | + +### Output: +| day | active_users | +| ---------- | ------------ | +| 2019-07-20 | 2 | +| 2019-07-21 | 2 | + +### Explanation: +- **2019-07-20**: Users **1 and 2** were active. +- **2019-07-21**: Users **2 and 3** were active. +- **Days with zero active users are ignored**. + +--- + +## πŸ–₯ SQL Solutions + +### 1️⃣ Standard MySQL Solution +#### Explanation: +- **Filter records** for the last **30 days** (ending on `2019-07-27`). +- Use `COUNT(DISTINCT user_id)` to count **unique active users per day**. +- Ignore **days with zero active users**. + +```sql +SELECT + activity_date AS day, + COUNT(DISTINCT user_id) AS active_users +FROM + Activity +WHERE + DATEDIFF('2019-07-27', activity_date) < 30 + AND DATEDIFF('2019-07-27', activity_date) >= 0 +GROUP BY activity_date; +``` + +--- + +### 2️⃣ Alternative Solution Using `BETWEEN` +#### Explanation: +- This solution filters the date range using `BETWEEN` instead of `DATEDIFF`. + +```sql +SELECT + activity_date AS day, + COUNT(DISTINCT user_id) AS active_users +FROM + Activity +WHERE + activity_date BETWEEN DATE_SUB('2019-07-27', INTERVAL 29 DAY) AND '2019-07-27' +GROUP BY activity_date; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### Explanation: +- Filter activity records for the **last 30 days**. +- **Group by `activity_date`** and count **unique `user_id`s**. +- **Ignore days with zero active users**. + +```python +import pandas as pd + +def daily_active_users(activity: pd.DataFrame) -> pd.DataFrame: + # Filter data within the last 30 days (ending on '2019-07-27') + filtered = activity[(activity["activity_date"] >= "2019-06-28") & (activity["activity_date"] <= "2019-07-27")] + + # Group by day and count unique users + result = filtered.groupby("activity_date")["user_id"].nunique().reset_index() + + # Rename columns + result.columns = ["day", "active_users"] + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ User-Activity-Past-30-Days +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_between.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/user-activity-for-the-past-30-days-i/) +- πŸ“š [SQL Date Functions](https://www.w3schools.com/sql/sql_dates.asp) +- 🐍 [Pandas Documentation](https://pandas.pydata.org/docs/) + +## Let me know if you need any changes! πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1148. Article Views I/1148. Article Views I.py b/LeetCode SQL 50 Solution/1148. Article Views I/1148. Article Views I.py new file mode 100644 index 0000000..e69de29 diff --git a/1148. Article Views I.sql b/LeetCode SQL 50 Solution/1148. Article Views I/1148. Article Views I.sql similarity index 100% rename from 1148. Article Views I.sql rename to LeetCode SQL 50 Solution/1148. Article Views I/1148. Article Views I.sql diff --git a/LeetCode SQL 50 Solution/1148. Article Views I/readme.md b/LeetCode SQL 50 Solution/1148. Article Views I/readme.md new file mode 100644 index 0000000..9e1f1db --- /dev/null +++ b/LeetCode SQL 50 Solution/1148. Article Views I/readme.md @@ -0,0 +1,119 @@ +# πŸ“° Article Views I - LeetCode 1148 + +## πŸ“Œ Problem Statement +You are given the **Views** table that records article views. + +### Views Table +| Column Name | Type | +| ----------- | ---- | +| article_id | int | +| author_id | int | +| viewer_id | int | +| view_date | date | + +- The table **may contain duplicate rows**. +- Each row indicates that **some viewer viewed an article** written by some author on a specific date. +- If `author_id = viewer_id`, it means **the author viewed their own article**. + +### Task: +Find **all authors** who have viewed **at least one of their own articles**. +- **Return the result sorted by `id` in ascending order**. + +--- + +## πŸ“Š Example 1: +### Input: +**Views Table** +| article_id | author_id | viewer_id | view_date | +| ---------- | --------- | --------- | ---------- | +| 1 | 3 | 5 | 2019-08-01 | +| 1 | 3 | 6 | 2019-08-02 | +| 2 | 7 | 7 | 2019-08-01 | +| 2 | 7 | 6 | 2019-08-02 | +| 4 | 7 | 1 | 2019-07-22 | +| 3 | 4 | 4 | 2019-07-21 | +| 3 | 4 | 4 | 2019-07-21 | + +### Output: +| id | +| --- | +| 4 | +| 7 | + +### Explanation: +- **Author 4** viewed their own article (`viewer_id = author_id`). +- **Author 7** viewed their own article (`viewer_id = author_id`). +- The result is sorted in **ascending order**. + +--- + +## πŸ–₯ SQL Solutions + +### 1️⃣ Standard MySQL Solution +#### Explanation: +- **Filter rows** where `author_id = viewer_id`. +- Use `DISTINCT` to **remove duplicates**. +- **Sort the result** in ascending order. + +```sql +SELECT DISTINCT author_id AS id +FROM Views +WHERE author_id = viewer_id +ORDER BY id ASC; +``` + +--- + +### 2️⃣ Alternative Solution Using `GROUP BY` +#### Explanation: +- **Group by** `author_id` and **filter authors** who have viewed at least one of their own articles. + +```sql +SELECT author_id AS id +FROM Views +WHERE author_id = viewer_id +GROUP BY author_id +ORDER BY id ASC; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### Explanation: +- **Filter rows** where `author_id == viewer_id`. +- **Select distinct author IDs**. +- **Sort the result** in ascending order. + +```python +import pandas as pd + +def authors_who_viewed_own_articles(views: pd.DataFrame) -> pd.DataFrame: + # Filter rows where author_id == viewer_id + filtered = views[views["author_id"] == views["viewer_id"]] + + # Select unique author IDs and sort + result = pd.DataFrame({"id": sorted(filtered["author_id"].unique())}) + + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Article-Views-I +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_group_by.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/article-views-i/) +- πŸ“š [SQL DISTINCT vs GROUP BY](https://www.w3schools.com/sql/sql_distinct.asp) +- 🐍 [Pandas Unique Function](https://pandas.pydata.org/docs/reference/api/pandas.Series.unique.html) + +## Let me know if you need any changes! πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/1164. Product Price at a Given Date.py b/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/1164. Product Price at a Given Date.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/1164. Product Price at a Given Date.sql b/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/1164. Product Price at a Given Date.sql new file mode 100644 index 0000000..6fa8bd6 --- /dev/null +++ b/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/1164. Product Price at a Given Date.sql @@ -0,0 +1,46 @@ + +1164. Product Price at a Given Date + +Table: Products + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| product_id | int | +| new_price | int | +| change_date | date | ++---------------+---------+ +(product_id, change_date) is the primary key (combination of columns with unique values) of this table. +Each row of this table indicates that the price of some product was changed to a new price at some date. + + +Write a solution to find the prices of all products on 2019-08-16. Assume the price of all products before any change is 10. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Products table: ++------------+-----------+-------------+ +| product_id | new_price | change_date | ++------------+-----------+-------------+ +| 1 | 20 | 2019-08-14 | +| 2 | 50 | 2019-08-14 | +| 1 | 30 | 2019-08-15 | +| 1 | 35 | 2019-08-16 | +| 2 | 65 | 2019-08-17 | +| 3 | 20 | 2019-08-18 | ++------------+-----------+-------------+ +Output: ++------------+-------+ +| product_id | price | ++------------+-------+ +| 2 | 50 | +| 1 | 35 | +| 3 | 10 | ++------------+-------+ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/readme.md b/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/readme.md new file mode 100644 index 0000000..e4aaf10 --- /dev/null +++ b/LeetCode SQL 50 Solution/1164. Product Price at a Given Date/readme.md @@ -0,0 +1,179 @@ + + +# **1164. Product Price at a Given Date** + +## **Problem Statement** +You are given the **Products** table, which keeps track of price changes. + +### **Products Table** +``` ++------------+-----------+-------------+ +| product_id | new_price | change_date | ++------------+-----------+-------------+ +| int | int | date | ++------------+-----------+-------------+ +``` +- `(product_id, change_date)` is the **primary key**. +- Each row represents a price update for a product on a specific date. + +### **Task:** +Find the price of all products on **2019-08-16**. +Assume the **initial price of all products is 10** before any change occurs. + +--- + +## **Example 1:** + +### **Input:** +**Products Table** +``` ++------------+-----------+-------------+ +| product_id | new_price | change_date | ++------------+-----------+-------------+ +| 1 | 20 | 2019-08-14 | +| 2 | 50 | 2019-08-14 | +| 1 | 30 | 2019-08-15 | +| 1 | 35 | 2019-08-16 | +| 2 | 65 | 2019-08-17 | +| 3 | 20 | 2019-08-18 | ++------------+-----------+-------------+ +``` + +### **Output:** +``` ++------------+-------+ +| product_id | price | ++------------+-------+ +| 2 | 50 | +| 1 | 35 | +| 3 | 10 | ++------------+-------+ +``` + +### **Explanation:** +- **Product 1:** Last change before `2019-08-16` β†’ **35** +- **Product 2:** Last change before `2019-08-16` β†’ **50** +- **Product 3:** **No price change before 2019-08-16**, so default price is **10** + +--- + +## **SQL Solutions** + +### **1️⃣ Standard MySQL Solution** +```sql +SELECT + p.product_id, + COALESCE(( + SELECT new_price + FROM Products + WHERE product_id = p.product_id + AND change_date <= '2019-08-16' + ORDER BY change_date DESC + LIMIT 1 + ), 10) AS price +FROM + (SELECT DISTINCT product_id FROM Products) p; +``` +#### **Explanation:** +1. **Find the last price before or on `2019-08-16`** + - `ORDER BY change_date DESC LIMIT 1` β†’ Gets the most recent price before `2019-08-16`. +2. **Use `COALESCE()`** + - If no price exists, set default price **10**. +3. **Use `DISTINCT product_id`** + - Ensures all unique products are checked. + +--- + +### **2️⃣ Window Function (SQL) Solution** +```sql +# Write your MySQL query statement below +# Write your MySQL query statement below +WITH + T AS (SELECT DISTINCT product_id FROM Products), + P AS ( + SELECT product_id, new_price AS price + FROM Products + WHERE + (product_id, change_date) IN ( + SELECT product_id, MAX(change_date) AS change_date + FROM Products + WHERE change_date <= '2019-08-16' + GROUP BY 1 + ) + ) +SELECT product_id, IFNULL(price, 10) AS price +FROM + T + LEFT JOIN P USING (product_id); +``` +#### **Explanation:** +1. **`RANK() OVER (PARTITION BY product_id ORDER BY change_date DESC)`** + - Assigns **rank 1** to the last price before `2019-08-16`. +2. **`LEFT JOIN` with `DISTINCT product_id`** + - Ensures all products are included. +3. **Use `COALESCE(price, 10)`** + - If no price exists, set default **10**. + +--- + +## **Pandas Solution (Python)** +```python +import pandas as pd + +# Sample Data +products_data = { + 'product_id': [1, 2, 1, 1, 2, 3], + 'new_price': [20, 50, 30, 35, 65, 20], + 'change_date': ['2019-08-14', '2019-08-14', '2019-08-15', '2019-08-16', '2019-08-17', '2019-08-18'] +} + +# Create DataFrame +products_df = pd.DataFrame(products_data) +products_df['change_date'] = pd.to_datetime(products_df['change_date']) # Convert to datetime + +# Filter for changes before or on '2019-08-16' +valid_prices = products_df[products_df['change_date'] <= '2019-08-16'] + +# Get the latest price for each product before '2019-08-16' +latest_prices = valid_prices.sort_values(by=['product_id', 'change_date']).groupby('product_id').last().reset_index() + +# Rename column +latest_prices = latest_prices[['product_id', 'new_price']].rename(columns={'new_price': 'price'}) + +# Get all unique products +all_products = products_df[['product_id']].drop_duplicates() + +# Merge with latest prices and fill missing values with 10 +final_prices = all_products.merge(latest_prices, on='product_id', how='left').fillna({'price': 10}) + +print(final_prices) +``` + +### **Explanation:** +1. **Convert `change_date` to datetime** + - Ensures proper date comparison. +2. **Filter for prices before `2019-08-16`** + - Excludes future price changes. +3. **Get the latest price per product (`groupby().last()`)** + - Retrieves the most recent price change. +4. **Merge with all products and set missing prices to `10`** + - Ensures all products are included. + +--- + +## **File Structure** +``` +LeetCode1164/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solutions (Standard + Window Functions). +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 1164](https://leetcode.com/problems/product-price-at-a-given-date/) +- [SQL COALESCE Documentation](https://www.w3schools.com/sql/sql_coalesce.asp) +- [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) + diff --git a/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/1174. Immediate Food Delivery II.py b/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/1174. Immediate Food Delivery II.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/1174. Immediate Food Delivery II.sql b/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/1174. Immediate Food Delivery II.sql new file mode 100644 index 0000000..f7c9563 --- /dev/null +++ b/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/1174. Immediate Food Delivery II.sql @@ -0,0 +1,72 @@ +1174. Immediate Food Delivery II +Medium +Topics +Companies +SQL Schema +Pandas Schema +Table: Delivery + ++-----------------------------+---------+ +| Column Name | Type | ++-----------------------------+---------+ +| delivery_id | int | +| customer_id | int | +| order_date | date | +| customer_pref_delivery_date | date | ++-----------------------------+---------+ +delivery_id is the column of unique values of this table. +The table holds information about food delivery to customers that make orders at some date and specify a preferred delivery date (on the same order date or after it). + + +If the customer's preferred delivery date is the same as the order date, then the order is called immediate; otherwise, it is called scheduled. + +The first order of a customer is the order with the earliest order date that the customer made. It is guaranteed that a customer has precisely one first order. + +Write a solution to find the percentage of immediate orders in the first orders of all customers, rounded to 2 decimal places. + +The result format is in the following example. + + + +Example 1: + +Input: +Delivery table: ++-------------+-------------+------------+-----------------------------+ +| delivery_id | customer_id | order_date | customer_pref_delivery_date | ++-------------+-------------+------------+-----------------------------+ +| 1 | 1 | 2019-08-01 | 2019-08-02 | +| 2 | 2 | 2019-08-02 | 2019-08-02 | +| 3 | 1 | 2019-08-11 | 2019-08-12 | +| 4 | 3 | 2019-08-24 | 2019-08-24 | +| 5 | 3 | 2019-08-21 | 2019-08-22 | +| 6 | 2 | 2019-08-11 | 2019-08-13 | +| 7 | 4 | 2019-08-09 | 2019-08-09 | ++-------------+-------------+------------+-----------------------------+ +Output: ++----------------------+ +| immediate_percentage | ++----------------------+ +| 50.00 | ++----------------------+ +Explanation: +The customer id 1 has a first order with delivery id 1 and it is scheduled. +The customer id 2 has a first order with delivery id 2 and it is immediate. +The customer id 3 has a first order with delivery id 5 and it is scheduled. +The customer id 4 has a first order with delivery id 7 and it is immediate. +Hence, half the customers have immediate first orders. + + + +# Write your MySQL query statement below +''' + + +select +round(100*sum(case when b.min_order_date = b.min_delivery_date then 1 else 0 end)/count(*), 2) +as immediate_percentage +from ( + select min(order_date) as min_order_date, min(customer_pref_delivery_date) as min_delivery_date + from delivery + group by customer_id +) b; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/readme.md b/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/readme.md new file mode 100644 index 0000000..ad1e800 --- /dev/null +++ b/LeetCode SQL 50 Solution/1174. Immediate Food Delivery II/readme.md @@ -0,0 +1,186 @@ + + +# **1174. Immediate Food Delivery II** + +## **Problem Statement** +You are given a table `Delivery` that records food deliveries made to customers. Each row represents an order with the date it was placed and the customer’s preferred delivery date. + +--- + +## **Delivery Table** +``` ++-------------+-------------+------------+-----------------------------+ +| Column Name | Type | Description | ++-------------+-------------+----------------------------------------------+ +| delivery_id | int | Unique identifier for the delivery | +| customer_id | int | Identifier for the customer | +| order_date | date | Date when the order was placed | +| customer_pref_delivery_date | date | Customer’s preferred delivery date | ++-------------+-------------+----------------------------------------------+ +``` +- `delivery_id` is the **primary key**. +- Each customer specifies a preferred delivery date, which can be the same as or after the order date. + +--- + +## **Task:** +Calculate the **percentage** of customers whose **first order** is **immediate** (i.e., the order date is the same as the customer’s preferred delivery date). +- A customer’s **first order** is defined as the order with the **earliest order_date** for that customer. +- The result should be **rounded to 2 decimal places**. +- Return the percentage as `immediate_percentage`. + +--- + +## **Example 1:** + +### **Input:** +**Delivery Table** +``` ++-------------+-------------+------------+-----------------------------+ +| delivery_id | customer_id | order_date | customer_pref_delivery_date | ++-------------+-------------+------------+-----------------------------+ +| 1 | 1 | 2019-08-01 | 2019-08-02 | +| 2 | 2 | 2019-08-02 | 2019-08-02 | +| 3 | 1 | 2019-08-11 | 2019-08-12 | +| 4 | 3 | 2019-08-24 | 2019-08-24 | +| 5 | 3 | 2019-08-21 | 2019-08-22 | +| 6 | 2 | 2019-08-11 | 2019-08-13 | +| 7 | 4 | 2019-08-09 | 2019-08-09 | ++-------------+-------------+------------+-----------------------------+ +``` + +### **Output:** +``` ++----------------------+ +| immediate_percentage | ++----------------------+ +| 50.00 | ++----------------------+ +``` + +### **Explanation:** +- **Customer 1:** First order is on **2019-08-01** (preferred: 2019-08-02) β†’ **Scheduled** +- **Customer 2:** First order is on **2019-08-02** (preferred: 2019-08-02) β†’ **Immediate** +- **Customer 3:** First order is on **2019-08-21** (preferred: 2019-08-22) β†’ **Scheduled** +- **Customer 4:** First order is on **2019-08-09** (preferred: 2019-08-09) β†’ **Immediate** + +Out of 4 customers, 2 have immediate first orders. +Percentage = (2 / 4) * 100 = **50.00** + +--- + +## **SQL Solutions** + +### **1️⃣ Standard MySQL Solution** +```sql +SELECT + ROUND(100 * SUM(CASE + WHEN first_orders.order_date = first_orders.customer_pref_delivery_date THEN 1 + ELSE 0 + END) / COUNT(*), 2) AS immediate_percentage +FROM ( + -- Get the first order (earliest order_date) for each customer + SELECT customer_id, order_date, customer_pref_delivery_date + FROM Delivery + WHERE (customer_id, order_date) IN ( + SELECT customer_id, MIN(order_date) + FROM Delivery + GROUP BY customer_id + ) +) AS first_orders; +``` + +#### **Explanation:** +- **Subquery:** Retrieves the first order for each customer by selecting the minimum `order_date`. +- **Outer Query:** + - Uses a `CASE` statement to check if the `order_date` equals `customer_pref_delivery_date` (i.e., immediate order). + - Calculates the percentage of immediate first orders. + - Rounds the result to 2 decimal places. + +--- + +### **2️⃣ Window Function (SQL) Solution** +```sql +WITH RankedOrders AS ( + SELECT + customer_id, + order_date, + customer_pref_delivery_date, + ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date) AS rn + FROM Delivery +) +SELECT + ROUND(100 * SUM(CASE WHEN order_date = customer_pref_delivery_date THEN 1 ELSE 0 END) / COUNT(*), 2) AS immediate_percentage +FROM RankedOrders +WHERE rn = 1; +``` + +#### **Explanation:** +- **CTE `RankedOrders`:** + - Uses `ROW_NUMBER()` to rank orders for each customer by `order_date`. + - Filters for the first order of each customer (`rn = 1`). +- **Final SELECT:** + - Computes the percentage of first orders that are immediate. + - Rounds the result to 2 decimal places. + +--- + +## **Pandas Solution (Python)** +```python +import pandas as pd + +def immediate_food_delivery_percentage(delivery: pd.DataFrame) -> pd.DataFrame: + # Ensure order_date and customer_pref_delivery_date are in datetime format + delivery['order_date'] = pd.to_datetime(delivery['order_date']) + delivery['customer_pref_delivery_date'] = pd.to_datetime(delivery['customer_pref_delivery_date']) + + # Get the first order date for each customer + first_order = delivery.groupby('customer_id')['order_date'].min().reset_index() + first_order = first_order.rename(columns={'order_date': 'first_order_date'}) + + # Merge to get the corresponding preferred delivery date for the first order + merged = pd.merge(delivery, first_order, on='customer_id', how='inner') + first_orders = merged[merged['order_date'] == merged['first_order_date']] + + # Calculate immediate orders + immediate_count = (first_orders['order_date'] == first_orders['customer_pref_delivery_date']).sum() + total_customers = first_orders['customer_id'].nunique() + immediate_percentage = round(100 * immediate_count / total_customers, 2) + + return pd.DataFrame({'immediate_percentage': [immediate_percentage]}) + +# Example usage: +# df = pd.read_csv('delivery.csv') +# print(immediate_food_delivery_percentage(df)) +``` + +#### **Explanation:** +- **Convert Dates:** + - Convert `order_date` and `customer_pref_delivery_date` to datetime for accurate comparison. +- **Determine First Order:** + - Group by `customer_id` to find the minimum `order_date` as the first order. + - Merge with the original DataFrame to obtain details of the first order. +- **Calculate Percentage:** + - Count how many first orders are immediate (where `order_date` equals `customer_pref_delivery_date`). + - Compute the percentage and round to 2 decimal places. + +--- + +## **File Structure** +``` +LeetCode1174/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_standard_solution.sql # Contains the Standard MySQL solution. +β”œβ”€β”€ sql_window_solution.sql # Contains the Window Function solution. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 1174](https://leetcode.com/problems/immediate-food-delivery-ii/) +- [SQL GROUP BY Documentation](https://www.w3schools.com/sql/sql_groupby.asp) +- [SQL Window Functions](https://www.w3schools.com/sql/sql_window.asp) +- [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) +- [Pandas Merge Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) diff --git a/LeetCode SQL 50 Solution/1193. Monthly Transactions I/1193. Monthly Transactions I.py b/LeetCode SQL 50 Solution/1193. Monthly Transactions I/1193. Monthly Transactions I.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1193. Monthly Transactions I/1193. Monthly Transactions I.sql b/LeetCode SQL 50 Solution/1193. Monthly Transactions I/1193. Monthly Transactions I.sql new file mode 100644 index 0000000..293451f --- /dev/null +++ b/LeetCode SQL 50 Solution/1193. Monthly Transactions I/1193. Monthly Transactions I.sql @@ -0,0 +1,49 @@ +1193. Monthly Transactions I + +Table: Transactions + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| id | int | +| country | varchar | +| state | enum | +| amount | int | +| trans_date | date | ++---------------+---------+ +id is the primary key of this table. +The table has information about incoming transactions. +The state column is an enum of type ["approved", "declined"]. + + +Write an SQL query to find for each month and country, the number of transactions and their total amount, the number of approved transactions and their total amount. + +Return the result table in any order. + +The query result format is in the following example. + + + +Example 1: + +Input: +Transactions table: ++------+---------+----------+--------+------------+ +| id | country | state | amount | trans_date | ++------+---------+----------+--------+------------+ +| 121 | US | approved | 1000 | 2018-12-18 | +| 122 | US | declined | 2000 | 2018-12-19 | +| 123 | US | approved | 2000 | 2019-01-01 | +| 124 | DE | approved | 2000 | 2019-01-07 | ++------+---------+----------+--------+------------+ +Output: ++----------+---------+-------------+----------------+--------------------+-----------------------+ +| month | country | trans_count | approved_count | trans_total_amount | approved_total_amount | ++----------+---------+-------------+----------------+--------------------+-----------------------+ +| 2018-12 | US | 2 | 1 | 3000 | 1000 | +| 2019-01 | US | 1 | 1 | 2000 | 2000 | +| 2019-01 | DE | 1 | 1 | 2000 | 2000 | ++----------+---------+-------------+----------------+--------------------+-----------------------+ + + +# Write your MySQL query statement below \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1193. Monthly Transactions I/readme.md b/LeetCode SQL 50 Solution/1193. Monthly Transactions I/readme.md new file mode 100644 index 0000000..c4de0bd --- /dev/null +++ b/LeetCode SQL 50 Solution/1193. Monthly Transactions I/readme.md @@ -0,0 +1,126 @@ +# 🏦 Monthly Transactions I - LeetCode 1193 + +## πŸ“Œ Problem Statement +You are given the **Transactions** table that records financial transactions. + +### Transactions Table +| Column Name | Type | +| ----------- | ------- | +| id | int | +| country | varchar | +| state | enum | +| amount | int | +| trans_date | date | + +- **id** is the **primary key**. +- The **state** column is an `ENUM` type with values **"approved"** and **"declined"**. +- Each row **records a transaction** with an amount and a transaction date. + +### Task: +Find **monthly statistics** for each country: +- Total **number of transactions**. +- Total **amount of transactions**. +- Total **number of approved transactions**. +- Total **amount of approved transactions**. + +The **month format should be `YYYY-MM`**. + +--- + +## πŸ“Š Example 1: +### Input: +**Transactions Table** +| id | country | state | amount | trans_date | +| --- | ------- | -------- | ------ | ---------- | +| 121 | US | approved | 1000 | 2018-12-18 | +| 122 | US | declined | 2000 | 2018-12-19 | +| 123 | US | approved | 2000 | 2019-01-01 | +| 124 | DE | approved | 2000 | 2019-01-07 | + +### Output: +| month | country | trans_count | approved_count | trans_total_amount | approved_total_amount | +| ------- | ------- | ----------- | -------------- | ------------------ | --------------------- | +| 2018-12 | US | 2 | 1 | 3000 | 1000 | +| 2019-01 | US | 1 | 1 | 2000 | 2000 | +| 2019-01 | DE | 1 | 1 | 2000 | 2000 | + +### Explanation: +- **December 2018 (US)**: + - **2 transactions** (1000 + 2000). + - **1 approved transaction** (1000). +- **January 2019 (US)**: + - **1 transaction** (2000). + - **1 approved transaction** (2000). +- **January 2019 (DE)**: + - **1 transaction** (2000). + - **1 approved transaction** (2000). + +--- + +## πŸ–₯ SQL Solution + +### 1️⃣ Standard MySQL Solution +#### Explanation: +- **Extract the month** from `trans_date` using `DATE_FORMAT()`. +- **Count transactions** for each `month` and `country`. +- **Sum transaction amounts**. +- **Filter only approved transactions** separately using `CASE WHEN`. + +```sql +SELECT + DATE_FORMAT(trans_date, '%Y-%m') AS month, + country, + COUNT(id) AS trans_count, + SUM(CASE WHEN state = 'approved' THEN 1 ELSE 0 END) AS approved_count, + SUM(amount) AS trans_total_amount, + SUM(CASE WHEN state = 'approved' THEN amount ELSE 0 END) AS approved_total_amount +FROM Transactions +GROUP BY month, country +ORDER BY month, country; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### Explanation: +- **Extract the month (`YYYY-MM`)** from `trans_date`. +- **Group by month and country**. +- **Compute counts and sums** using `.agg()`. + +```python +import pandas as pd + +def monthly_transactions(transactions: pd.DataFrame) -> pd.DataFrame: + # Extract 'YYYY-MM' from the trans_date + transactions['month'] = transactions['trans_date'].dt.strftime('%Y-%m') + + # Aggregate transaction counts and sums + result = transactions.groupby(['month', 'country']).agg( + trans_count=('id', 'count'), + approved_count=('state', lambda x: (x == 'approved').sum()), + trans_total_amount=('amount', 'sum'), + approved_total_amount=('amount', lambda x: x[transactions['state'] == 'approved'].sum()) + ).reset_index() + + return result.sort_values(['month', 'country']) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Monthly-Transactions +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/monthly-transactions-i/) +- πŸ“š [SQL `GROUP BY` Clause](https://www.w3schools.com/sql/sql_groupby.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/groupby.html) + +## Let me know if you need any modifications! πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.md b/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.md new file mode 100644 index 0000000..24fb197 --- /dev/null +++ b/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.md @@ -0,0 +1,77 @@ +# 1204. Last Person to Fit in the Bus +``` + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| person_id | int | +| person_name | varchar | +| weight | int | +| turn | int | ++-------------+---------+ + +``` + +person_id column contains unique values. +This table has the information about all people waiting for a bus. +The person_id and turn columns will contain all numbers from 1 to n, where n is the number of rows in the table. +turn determines the order of which the people will board the bus, where turn=1 denotes the first person to board and turn=n denotes the last person to board. +weight is the weight of the person in kilograms. + + +There is a queue of people waiting to board a bus. However, the bus has a weight limit of 1000 kilograms, so there may be some people who cannot board. + +Write a solution to find the person_name of the last person that can fit on the bus without exceeding the weight limit. The test cases are generated such that the first person does not exceed the weight limit. + +Note that only one person can board the bus at any given turn. + +## The result format is in the following example. + +```table[] + +Example 1: + +Input: +Queue table: ++-----------+-------------+--------+------+ +| person_id | person_name | weight | turn | ++-----------+-------------+--------+------+ +| 5 | Alice | 250 | 1 | +| 4 | Bob | 175 | 5 | +| 3 | Alex | 350 | 2 | +| 6 | John Cena | 400 | 3 | +| 1 | Winston | 500 | 6 | +| 2 | Marie | 200 | 4 | ++-----------+-------------+--------+------+ +Output: ++-------------+ +| person_name | ++-------------+ +| John Cena | ++-------------+ +Explanation: The folowing table is ordered by the turn for simplicity. ++------+----+-----------+--------+--------------+ +| Turn | ID | Name | Weight | Total Weight | ++------+----+-----------+--------+--------------+ +| 1 | 5 | Alice | 250 | 250 | +| 2 | 3 | Alex | 350 | 600 | +| 3 | 6 | John Cena | 400 | 1000 | (last person to board) +| 4 | 2 | Marie | 200 | 1200 | (cannot board) +| 5 | 4 | Bob | 175 | ___ | +| 6 | 1 | Winston | 500 | ___ | ++------+----+-----------+--------+--------------+ +``` + +```SQL[] +# Write your MySQL query statement below +# Write your MySQL query statement below +SELECT a.person_name +FROM + Queue AS a, + Queue AS b +WHERE a.turn >= b.turn +GROUP BY a.person_id +HAVING SUM(b.weight) <= 1000 +ORDER BY a.turn DESC +LIMIT 1; +``` \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.py b/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.sql b/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.sql new file mode 100644 index 0000000..7fe1a3c --- /dev/null +++ b/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/1204. Last Person to Fit in the Bus.sql @@ -0,0 +1,148 @@ +1204. Last Person to Fit in the Bus + +Table: Queue + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| person_id | int | +| person_name | varchar | +| weight | int | +| turn | int | ++-------------+---------+ +person_id column contains unique values. +This table has the information about all people waiting for a bus. +The person_id and turn columns will contain all numbers from 1 to n, where n is the number of rows in the table. +turn determines the order of which the people will board the bus, where turn=1 denotes the first person to board and turn=n denotes the last person to board. +weight is the weight of the person in kilograms. + + +There is a queue of people waiting to board a bus. However, the bus has a weight limit of 1000 kilograms, so there may be some people who cannot board. + +Write a solution to find the person_name of the last person that can fit on the bus without exceeding the weight limit. The test cases are generated such that the first person does not exceed the weight limit. + +Note that only one person can board the bus at any given turn. + +The result format is in the following example. + + + +Example 1: + +Input: +Queue table: ++-----------+-------------+--------+------+ +| person_id | person_name | weight | turn | ++-----------+-------------+--------+------+ +| 5 | Alice | 250 | 1 | +| 4 | Bob | 175 | 5 | +| 3 | Alex | 350 | 2 | +| 6 | John Cena | 400 | 3 | +| 1 | Winston | 500 | 6 | +| 2 | Marie | 200 | 4 | ++-----------+-------------+--------+------+ +Output: ++-------------+ +| person_name | ++-------------+ +| John Cena | ++-------------+ +Explanation: The folowing table is ordered by the turn for simplicity. ++------+----+-----------+--------+--------------+ +| Turn | ID | Name | Weight | Total Weight | ++------+----+-----------+--------+--------------+ +| 1 | 5 | Alice | 250 | 250 | +| 2 | 3 | Alex | 350 | 600 | +| 3 | 6 | John Cena | 400 | 1000 | (last person to board) +| 4 | 2 | Marie | 200 | 1200 | (cannot board) +| 5 | 4 | Bob | 175 | ___ | +| 6 | 1 | Winston | 500 | ___ | ++------+----+-----------+--------+--------------+ + + + +# Write your MySQL query statement below +SELECT a.person_name +FROM + Queue AS a, + Queue AS b +WHERE a.turn >= b.turn +GROUP BY a.person_id +HAVING SUM(b.weight) <= 1000 +ORDER BY a.turn DESC +LIMIT 1; + + +Let's break down the query step by step to understand what it does: + +--- + +### 1. Self-Join of the Table + +```sql +FROM + Queue AS a, + Queue AS b +WHERE a.turn >= b.turn +``` + +- **Self-Join:** + The query treats the `Queue` table as two separate aliases: `a` and `b`. This is a self-join, meaning each row in `a` is paired with rows in `b`. + +- **Join Condition (`a.turn >= b.turn`):** + For each row in alias `a`, the query pairs it with every row in alias `b` that has a `turn` value less than or equal to `a.turn`. + - **Purpose:** This setup is used to accumulate data from the start of the queue up to the current person's turn. + +--- + +### 2. Grouping by Person + +```sql +GROUP BY a.person_id +``` + +- **Grouping:** + The query groups the resulting joined rows by `a.person_id`. + - **Effect:** For each person in the queue (represented by alias `a`), all rows (from `b`) where `b.turn` is less than or equal to `a.turn` are aggregated together. + +--- + +### 3. Calculating the Cumulative Weight + +```sql +HAVING SUM(b.weight) <= 1000 +``` + +- **Cumulative Sum:** + Within each group, the query calculates the sum of `b.weight`. + - **Condition:** The `HAVING` clause filters out groups where the cumulative weight (i.e., the sum of weights from the start of the queue up to the current person's turn) exceeds 1000. + - **Interpretation:** Only those persons for whom the cumulative weight of all people before and including them is **less than or equal to 1000** are kept. + +--- + +### 4. Selecting the Result + +```sql +SELECT a.person_name +``` + +- **Result Column:** + After filtering, the query selects the `person_name` from alias `a` for each group that passed the `HAVING` condition. + +--- + +### 5. Ordering and Limiting the Result + +```sql +ORDER BY a.turn DESC +LIMIT 1; +``` + +- **Ordering:** + The results are ordered by `a.turn` in descending order. + - **Purpose:** This ensures that among all persons whose cumulative weight is ≀ 1000, the one with the **latest (highest) turn** is at the top. + +- **Limiting:** + The `LIMIT 1` clause restricts the output to only the top result, effectively returning **one person**. + +--- diff --git a/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/readme.md b/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/readme.md new file mode 100644 index 0000000..a5a1373 --- /dev/null +++ b/LeetCode SQL 50 Solution/1204. Last Person to Fit in the Bus/readme.md @@ -0,0 +1,149 @@ +# 🚌 Last Person to Fit in the Bus - LeetCode 1204 + +## πŸ“Œ Problem Statement +You are given the **Queue** table, which contains information about people waiting for a bus. + +### Queue Table +| Column Name | Type | +| ----------- | ------- | +| person_id | int | +| person_name | varchar | +| weight | int | +| turn | int | + +- **person_id** contains unique values. +- The **turn** column determines the order in which people will board (`turn = 1` means the first person to board). +- The **bus has a weight limit of 1000 kg**. +- Only **one person can board at a time**. + +### Task: +Find **the last person** who can board the bus **without exceeding the 1000 kg weight limit**. + +--- + +## πŸ“Š Example 1: +### Input: +**Queue Table** +| person_id | person_name | weight | turn | +| --------- | ----------- | ------ | ---- | +| 5 | Alice | 250 | 1 | +| 4 | Bob | 175 | 5 | +| 3 | Alex | 350 | 2 | +| 6 | John Cena | 400 | 3 | +| 1 | Winston | 500 | 6 | +| 2 | Marie | 200 | 4 | + +### Output: +| person_name | +| ----------- | +| John Cena | + +### Explanation: +Ordering by `turn`: +| Turn | ID | Name | Weight | Total Weight | +| ---- | --- | --------- | ------ | ------------ | +| 1 | 5 | Alice | 250 | 250 | +| 2 | 3 | Alex | 350 | 600 | +| 3 | 6 | John Cena | 400 | 1000 | βœ… (last person to board) | +| 4 | 2 | Marie | 200 | 1200 | ❌ (exceeds limit) | +| 5 | 4 | Bob | 175 | ❌ | +| 6 | 1 | Winston | 500 | ❌ | + +--- + +## πŸ–₯ SQL Solution + +### 1️⃣ Standard MySQL Solution +#### Explanation: +- **Use a self-join** to accumulate the total weight up to each person's turn. +- **Filter out** people whose cumulative weight exceeds **1000**. +- **Find the last person** who can board. + +```sql +SELECT a.person_name +FROM + Queue AS a, + Queue AS b +WHERE a.turn >= b.turn +GROUP BY a.person_id +HAVING SUM(b.weight) <= 1000 +ORDER BY a.turn DESC +LIMIT 1; +``` + +--- + +### πŸ“ Step-by-Step Breakdown: + +1️⃣ **Self-Join on the Table** +```sql +FROM Queue AS a, Queue AS b +WHERE a.turn >= b.turn +``` +- This pairs each row `a` with all rows `b` where `b.turn` is less than or equal to `a.turn`. +- Allows us to calculate the **cumulative sum of weights** for each person. + +2️⃣ **Group by Each Person** +```sql +GROUP BY a.person_id +``` +- Groups all rows by `person_id` so we can perform calculations per person. + +3️⃣ **Compute the Cumulative Weight** +```sql +HAVING SUM(b.weight) <= 1000 +``` +- Filters out people whose cumulative boarding weight exceeds **1000 kg**. + +4️⃣ **Find the Last Person Who Can Board** +```sql +ORDER BY a.turn DESC +LIMIT 1; +``` +- **Sorts by turn in descending order** so that we find the **last person** who can board. +- **Limits to 1 row** to return only the last eligible person. + +--- + +## 🐍 Pandas Solution (Python) +#### Explanation: +- **Sort by `turn`** to simulate the boarding order. +- **Compute the cumulative sum** of weights. +- **Find the last person** whose weight sum **does not exceed 1000**. + +```python +import pandas as pd + +def last_person_to_fit(queue: pd.DataFrame) -> pd.DataFrame: + # Sort by turn + queue = queue.sort_values(by="turn") + + # Compute cumulative weight sum + queue["cumulative_weight"] = queue["weight"].cumsum() + + # Filter those who fit on the bus + queue = queue[queue["cumulative_weight"] <= 1000] + + # Return the last person to fit + return queue.tail(1)[["person_name"]] +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Last-Person-Fit +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/last-person-to-fit-in-the-bus/) +- πŸ“š [SQL `GROUP BY` Clause](https://www.w3schools.com/sql/sql_groupby.asp) +- 🐍 [Pandas cumsum() Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.cumsum.html) + +## Let me know if you need any modifications! πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/1211. Queries Quality and Percentage.py b/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/1211. Queries Quality and Percentage.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/1211. Queries Quality and Percentage.sql b/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/1211. Queries Quality and Percentage.sql new file mode 100644 index 0000000..cc299ea --- /dev/null +++ b/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/1211. Queries Quality and Percentage.sql @@ -0,0 +1,77 @@ +1211. Queries Quality and Percentage + +Table: Queries + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| query_name | varchar | +| result | varchar | +| position | int | +| rating | int | ++-------------+---------+ +This table may have duplicate rows. +This table contains information collected from some queries on a database. +The position column has a value from 1 to 500. +The rating column has a value from 1 to 5. Query with rating less than 3 is a poor query. + + +We define query quality as: + +The average of the ratio between query rating and its position. + +We also define poor query percentage as: + +The percentage of all queries with rating less than 3. + +Write a solution to find each query_name, the quality and poor_query_percentage. + +Both quality and poor_query_percentage should be rounded to 2 decimal places. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Queries table: ++------------+-------------------+----------+--------+ +| query_name | result | position | rating | ++------------+-------------------+----------+--------+ +| Dog | Golden Retriever | 1 | 5 | +| Dog | German Shepherd | 2 | 5 | +| Dog | Mule | 200 | 1 | +| Cat | Shirazi | 5 | 2 | +| Cat | Siamese | 3 | 3 | +| Cat | Sphynx | 7 | 4 | ++------------+-------------------+----------+--------+ +Output: ++------------+---------+-----------------------+ +| query_name | quality | poor_query_percentage | ++------------+---------+-----------------------+ +| Dog | 2.50 | 33.33 | +| Cat | 0.66 | 33.33 | ++------------+---------+-----------------------+ +Explanation: +Dog queries quality is ((5 / 1) + (5 / 2) + (1 / 200)) / 3 = 2.50 +Dog queries poor_ query_percentage is (1 / 3) * 100 = 33.33 + +Cat queries quality equals ((2 / 5) + (3 / 3) + (4 / 7)) / 3 = 0.66 +Cat queries poor_ query_percentage is (1 / 3) * 100 = 33.33 + + +# Write your MySQL query statement below +SELECT query_name, +ROUND(AVG(rating * 1.0 / position), 2) AS quality, +ROUND(SUM(CASE WHEN rating < 3 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS poor_query_percentage +FROM Queries +GROUP BY query_name; + +-- SELECT query_name, +-- ROUND(AVG(rating * 1.0 / position), 2) AS quality, +-- ROUND(SUM(IF(rating < 3, 1, 0)) * 100.0 / COUNT(*), 2) AS poor_query_percentage +-- FROM Queries +-- GROUP BY query_name; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/readme.md b/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/readme.md new file mode 100644 index 0000000..0f00160 --- /dev/null +++ b/LeetCode SQL 50 Solution/1211. Queries Quality and Percentage/readme.md @@ -0,0 +1,180 @@ +# πŸ“Š Queries Quality and Percentage - LeetCode 1211 + +## πŸ“Œ Problem Statement +You are given the **Queries** table, which contains information collected from various queries on a database. + +### Queries Table +| Column Name | Type | +| ----------- | ------- | +| query_name | varchar | +| result | varchar | +| position | int | +| rating | int | + +- The **position** column has values from **1 to 500**. +- The **rating** column has values from **1 to 5**. +- **Queries with rating < 3 are considered "poor queries".** + +### Definitions: +1️⃣ **Query Quality:** + The **average** of the **ratio** between query rating and its position: + \[ + \text{quality} = \frac{\sum (\text{rating} / \text{position})}{\text{total queries for that name}} + \] + +2️⃣ **Poor Query Percentage:** + The percentage of all queries where **rating < 3**: + \[ + \text{poor\_query\_percentage} = \left(\frac{\text{count of poor queries}}{\text{total queries}}\right) \times 100 + \] + +--- + +## πŸ“Š Example 1: +### Input: +**Queries Table** +| query_name | result | position | rating | +| ---------- | ---------------- | -------- | ------ | +| Dog | Golden Retriever | 1 | 5 | +| Dog | German Shepherd | 2 | 5 | +| Dog | Mule | 200 | 1 | +| Cat | Shirazi | 5 | 2 | +| Cat | Siamese | 3 | 3 | +| Cat | Sphynx | 7 | 4 | + +### Output: +| query_name | quality | poor_query_percentage | +| ---------- | ------- | --------------------- | +| Dog | 2.50 | 33.33 | +| Cat | 0.66 | 33.33 | + +### Explanation: +#### **Dog** +- **Quality Calculation:** + \[ + \left( \frac{5}{1} + \frac{5}{2} + \frac{1}{200} \right) \div 3 = 2.50 + \] +- **Poor Query Percentage:** + - Poor Queries: **1** (Mule, rating = 1) + - Total Queries: **3** + \[ + (1 / 3) \times 100 = 33.33\% + \] + +#### **Cat** +- **Quality Calculation:** + \[ + \left( \frac{2}{5} + \frac{3}{3} + \frac{4}{7} \right) \div 3 = 0.66 + \] +- **Poor Query Percentage:** + - Poor Queries: **1** (Shirazi, rating = 2) + - Total Queries: **3** + \[ + (1 / 3) \times 100 = 33.33\% + \] + +--- + +## πŸ–₯ SQL Solution + +### 1️⃣ Standard MySQL Query +#### Explanation: +- **Calculate quality** using `AVG(rating / position)`. +- **Count poor queries** using `SUM(CASE WHEN rating < 3 THEN 1 ELSE 0 END)`. +- **Calculate percentage** using `(COUNT of poor queries / total queries) * 100`. + +```sql +SELECT query_name, + ROUND(AVG(rating * 1.0 / position), 2) AS quality, + ROUND(SUM(CASE WHEN rating < 3 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS poor_query_percentage +FROM Queries +GROUP BY query_name; +``` + +--- + +### πŸ“ Step-by-Step Breakdown: + +1️⃣ **Grouping Queries by `query_name`** +```sql +GROUP BY query_name; +``` +- Ensures calculations are **per query type**. + +2️⃣ **Calculating Query Quality** +```sql +ROUND(AVG(rating * 1.0 / position), 2) AS quality +``` +- **`rating / position`** calculates the ratio. +- **`AVG(...)`** finds the average across all entries for the query. +- **Multiplying by `1.0` ensures floating-point division.** +- **`ROUND(..., 2)` rounds to 2 decimal places**. + +3️⃣ **Calculating Poor Query Percentage** +```sql +ROUND(SUM(CASE WHEN rating < 3 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS poor_query_percentage +``` +- **Counts queries with `rating < 3` using `SUM(CASE WHEN ... THEN 1 ELSE 0 END)`**. +- **Divides by total queries (`COUNT(*)`) and multiplies by `100`**. +- **Rounds to 2 decimal places**. + +--- + +### 2️⃣ Alternative MySQL Query (Using `IF` Instead of `CASE`) + +```sql +SELECT query_name, + ROUND(AVG(rating * 1.0 / position), 2) AS quality, + ROUND(SUM(IF(rating < 3, 1, 0)) * 100.0 / COUNT(*), 2) AS poor_query_percentage +FROM Queries +GROUP BY query_name; +``` +- **`IF(rating < 3, 1, 0)`** is equivalent to `CASE WHEN rating < 3 THEN 1 ELSE 0 END`. + +--- + +## 🐍 Pandas Solution (Python) +#### Explanation: +- **Group by `query_name`**. +- **Calculate query quality** as `rating / position`, then average. +- **Filter poor queries (`rating < 3`) and compute percentage**. + +```python +import pandas as pd + +def queries_quality(queries: pd.DataFrame) -> pd.DataFrame: + # Group by query_name + grouped = queries.groupby("query_name") + + # Compute Quality + quality = grouped.apply(lambda x: round((x["rating"] / x["position"]).mean(), 2)) + + # Compute Poor Query Percentage + poor_query_percentage = grouped.apply(lambda x: round((x["rating"] < 3).mean() * 100, 2)) + + # Return result + result = pd.DataFrame({"query_name": quality.index, + "quality": quality.values, + "poor_query_percentage": poor_query_percentage.values}) + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Queries-Quality +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/queries-quality-and-percentage/) +- πŸ“š [SQL `GROUP BY` Documentation](https://www.w3schools.com/sql/sql_groupby.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/groupby.html) + +## Let me know if you'd like any modifications! πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1251. Average Selling Price/1251. Average Selling Price.py b/LeetCode SQL 50 Solution/1251. Average Selling Price/1251. Average Selling Price.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1251. Average Selling Price/1251. Average Selling Price.sql b/LeetCode SQL 50 Solution/1251. Average Selling Price/1251. Average Selling Price.sql new file mode 100644 index 0000000..82eb691 --- /dev/null +++ b/LeetCode SQL 50 Solution/1251. Average Selling Price/1251. Average Selling Price.sql @@ -0,0 +1,86 @@ +1251. Average Selling Price +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Prices + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| product_id | int | +| start_date | date | +| end_date | date | +| price | int | ++---------------+---------+ +(product_id, start_date, end_date) is the primary key (combination of columns with unique values) for this table. +Each row of this table indicates the price of the product_id in the period from start_date to end_date. +For each product_id there will be no two overlapping periods. That means there will be no two intersecting periods for the same product_id. + + +Table: UnitsSold + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| product_id | int | +| purchase_date | date | +| units | int | ++---------------+---------+ +This table may contain duplicate rows. +Each row of this table indicates the date, units, and product_id of each product sold. + + +Write a solution to find the average selling price for each product. average_price should be rounded to 2 decimal places. If a product does not have any sold units, its average selling price is assumed to be 0. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Prices table: ++------------+------------+------------+--------+ +| product_id | start_date | end_date | price | ++------------+------------+------------+--------+ +| 1 | 2019-02-17 | 2019-02-28 | 5 | +| 1 | 2019-03-01 | 2019-03-22 | 20 | +| 2 | 2019-02-01 | 2019-02-20 | 15 | +| 2 | 2019-02-21 | 2019-03-31 | 30 | ++------------+------------+------------+--------+ +UnitsSold table: ++------------+---------------+-------+ +| product_id | purchase_date | units | ++------------+---------------+-------+ +| 1 | 2019-02-25 | 100 | +| 1 | 2019-03-01 | 15 | +| 2 | 2019-02-10 | 200 | +| 2 | 2019-03-22 | 30 | ++------------+---------------+-------+ +Output: ++------------+---------------+ +| product_id | average_price | ++------------+---------------+ +| 1 | 6.96 | +| 2 | 16.96 | ++------------+---------------+ +Explanation: +Average selling price = Total Price of Product / Number of products sold. +Average selling price for product 1 = ((100 * 5) + (15 * 20)) / 115 = 6.96 +Average selling price for product 2 = ((200 * 15) + (30 * 30)) / 230 = 16.96 +Note that the average selling price is 0 if the product was not sold. + +# Write your MySQL query statement below + +# Write your MySQL query statement below +SELECT p.product_id, IFNULL(round(SUM(p.price*u.units)/sum(u.units),2),0) as average_price +FROM Prices p +LEFT JOIN UnitsSold u +ON p.product_id = u.product_id AND +u.purchase_date BETWEEN p.Start_date and p.end_date +GROUP BY p.product_id +-- SELECT product_id, ROUND(AVG(COALESCE(price, 0)), 2) AS average_price \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1251. Average Selling Price/readme.md b/LeetCode SQL 50 Solution/1251. Average Selling Price/readme.md new file mode 100644 index 0000000..d099985 --- /dev/null +++ b/LeetCode SQL 50 Solution/1251. Average Selling Price/readme.md @@ -0,0 +1,216 @@ + +# πŸ›’ Average Selling Price - LeetCode 1251 + +## πŸ“Œ Problem Statement +You are given two tables, **Prices** and **UnitsSold**, which contain information about product pricing and sales. + +### πŸ“Š Prices Table +| Column Name | Type | +| ----------- | ---- | +| product_id | int | +| start_date | date | +| end_date | date | +| price | int | + +- `(product_id, start_date, end_date)` is the **primary key**. +- Each row defines the price for `product_id` **within a specific date range**. +- **No two price periods overlap** for the same product. + +### πŸ“Š UnitsSold Table +| Column Name | Type | +| ------------- | ---- | +| product_id | int | +| purchase_date | date | +| units | int | + +- Each row records the number of units sold for `product_id` on `purchase_date`. +- **Table may contain duplicate rows**. + +### πŸ”’ Goal: +Find the **average selling price** for each `product_id`, rounded to **2 decimal places**. +If a product has **no sales**, its average price should be **0**. + +--- + +## πŸ“Š Example 1: +### Input: +### **Prices Table** +| product_id | start_date | end_date | price | +| ---------- | ---------- | ---------- | ----- | +| 1 | 2019-02-17 | 2019-02-28 | 5 | +| 1 | 2019-03-01 | 2019-03-22 | 20 | +| 2 | 2019-02-01 | 2019-02-20 | 15 | +| 2 | 2019-02-21 | 2019-03-31 | 30 | + +### **UnitsSold Table** +| product_id | purchase_date | units | +| ---------- | ------------- | ----- | +| 1 | 2019-02-25 | 100 | +| 1 | 2019-03-01 | 15 | +| 2 | 2019-02-10 | 200 | +| 2 | 2019-03-22 | 30 | + +### Output: +| product_id | average_price | +| ---------- | ------------- | +| 1 | 6.96 | +| 2 | 16.96 | + +--- + +## πŸ” Explanation: +### **Formula** +\[ +\text{Average Selling Price} = \frac{\sum (\text{price} \times \text{units sold})}{\sum (\text{units sold})} +\] + +### **Product 1 Calculation** +- **Feb 25, 2019:** 100 units sold at **$5** +- **Mar 01, 2019:** 15 units sold at **$20** +- **Total Price Contribution:** + \[ + (100 \times 5) + (15 \times 20) = 500 + 300 = 800 + \] +- **Total Units Sold:** + \[ + 100 + 15 = 115 + \] +- **Average Price:** + \[ + 800 / 115 = 6.96 + \] + +### **Product 2 Calculation** +- **Feb 10, 2019:** 200 units sold at **$15** +- **Mar 22, 2019:** 30 units sold at **$30** +- **Total Price Contribution:** + \[ + (200 \times 15) + (30 \times 30) = 3000 + 900 = 3900 + \] +- **Total Units Sold:** + \[ + 200 + 30 = 230 + \] +- **Average Price:** + \[ + 3900 / 230 = 16.96 + \] + +--- + +## πŸ–₯ SQL Solution + +### 1️⃣ Standard MySQL Query +#### **Explanation:** +- **Join `Prices` and `UnitsSold`** on `product_id`, ensuring `purchase_date` falls **within the valid price period** (`start_date` ≀ `purchase_date` ≀ `end_date`). +- **Multiply `price * units`** for total revenue. +- **Sum total units** for each product. +- **Use `ROUND(..., 2)`** to get 2 decimal places. +- **Use `IFNULL(..., 0)`** to handle cases where no units were sold. + +```sql +SELECT p.product_id, + IFNULL(ROUND(SUM(p.price * u.units) / SUM(u.units), 2), 0) AS average_price +FROM Prices p +LEFT JOIN UnitsSold u +ON p.product_id = u.product_id +AND u.purchase_date BETWEEN p.start_date AND p.end_date +GROUP BY p.product_id; +``` + +--- + +### πŸ“ Step-by-Step Breakdown: + +1️⃣ **Join Tables Based on Matching Date Ranges** +```sql +LEFT JOIN UnitsSold u +ON p.product_id = u.product_id +AND u.purchase_date BETWEEN p.start_date AND p.end_date +``` +- Ensures we only match **valid** sales based on pricing periods. + +2️⃣ **Calculate Revenue Per Product** +```sql +SUM(p.price * u.units) +``` +- Computes total revenue for each product. + +3️⃣ **Compute Total Sold Units Per Product** +```sql +SUM(u.units) +``` +- Sums up all sold units. + +4️⃣ **Calculate Average Price and Handle Edge Cases** +```sql +ROUND(SUM(p.price * u.units) / SUM(u.units), 2) +``` +- Ensures precision with 2 decimal places. + +5️⃣ **Handle Products with No Sales** +```sql +IFNULL(..., 0) +``` +- If `SUM(u.units)` is `NULL`, return `0`. + +--- + +### 2️⃣ Alternative MySQL Query (Using `COALESCE`) +```sql +SELECT p.product_id, + ROUND(SUM(COALESCE(p.price, 0) * COALESCE(u.units, 0)) / SUM(COALESCE(u.units, 0)), 2) AS average_price +FROM Prices p +LEFT JOIN UnitsSold u +ON p.product_id = u.product_id +AND u.purchase_date BETWEEN p.start_date AND p.end_date +GROUP BY p.product_id; +``` +- Uses **`COALESCE(value, 0)`** instead of `IFNULL()` for robustness. + +--- + +## 🐍 Pandas Solution (Python) +#### **Explanation:** +- **Merge DataFrames on `product_id`** where `purchase_date` falls in the price range. +- **Compute total price & units**. +- **Handle cases where no units were sold**. + +```python +import pandas as pd + +def average_selling_price(prices: pd.DataFrame, units_sold: pd.DataFrame) -> pd.DataFrame: + # Merge on product_id where purchase_date is within the valid price period + merged = prices.merge(units_sold, on="product_id") + merged = merged[(merged["purchase_date"] >= merged["start_date"]) & + (merged["purchase_date"] <= merged["end_date"])] + + # Calculate total revenue and total units per product + merged["total_price"] = merged["price"] * merged["units"] + result = merged.groupby("product_id").agg( + average_price=("total_price", lambda x: round(x.sum() / merged.loc[x.index, "units"].sum(), 2)) + ).reset_index() + + # Handle products with no sales + result["average_price"] = result["average_price"].fillna(0) + + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Average-Selling-Price +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/average-selling-price/) +- πŸ“š [SQL `LEFT JOIN` Documentation](https://www.w3schools.com/sql/sql_join_left.asp) +- 🐍 [Pandas Merge Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) diff --git a/LeetCode SQL 50 Solution/1280. Students and Examinations/1280. Students and Examinations.py b/LeetCode SQL 50 Solution/1280. Students and Examinations/1280. Students and Examinations.py new file mode 100644 index 0000000..e69de29 diff --git a/1280. Students and Examinations.sql b/LeetCode SQL 50 Solution/1280. Students and Examinations/1280. Students and Examinations.sql similarity index 100% rename from 1280. Students and Examinations.sql rename to LeetCode SQL 50 Solution/1280. Students and Examinations/1280. Students and Examinations.sql diff --git a/LeetCode SQL 50 Solution/1280. Students and Examinations/readme.md b/LeetCode SQL 50 Solution/1280. Students and Examinations/readme.md new file mode 100644 index 0000000..56e7bf0 --- /dev/null +++ b/LeetCode SQL 50 Solution/1280. Students and Examinations/readme.md @@ -0,0 +1,178 @@ +# πŸŽ“ Find the Number of Times Each Student Attended Each Exam - LeetCode 1204 + +## πŸ“Œ Problem Statement +You are given three tables: **Students**, **Subjects**, and **Examinations**, which contain information about students, subjects, and exam attendance. + +### πŸ“Š Students Table +| Column Name | Type | +| ------------ | ------- | +| student_id | int | +| student_name | varchar | +- `student_id` is the **primary key**. +- Each row represents a **unique student**. + +### πŸ“Š Subjects Table +| Column Name | Type | +| ------------ | ------- | +| subject_name | varchar | +- `subject_name` is the **primary key**. +- Each row represents a **unique subject**. + +### πŸ“Š Examinations Table +| Column Name | Type | +| ------------ | ------- | +| student_id | int | +| subject_name | varchar | +- **No primary key** (may contain duplicates). +- Each row indicates that a student attended an exam for a specific subject. + +### πŸ”’ Goal: +Find the **number of times each student attended each exam**. +If a student did **not attend an exam**, return `0`. +Return the results **ordered by** `student_id` and `subject_name`. + +--- + +## πŸ“Š Example 1: +### Input: + +### **Students Table** +| student_id | student_name | +| ---------- | ------------ | +| 1 | Alice | +| 2 | Bob | +| 13 | John | +| 6 | Alex | + +### **Subjects Table** +| subject_name | +| ------------ | +| Math | +| Physics | +| Programming | + +### **Examinations Table** +| student_id | subject_name | +| ---------- | ------------ | +| 1 | Math | +| 1 | Physics | +| 1 | Programming | +| 2 | Programming | +| 1 | Physics | +| 1 | Math | +| 13 | Math | +| 13 | Programming | +| 13 | Physics | +| 2 | Math | +| 1 | Math | + +### Output: +| student_id | student_name | subject_name | attended_exams | +| ---------- | ------------ | ------------ | -------------- | +| 1 | Alice | Math | 3 | +| 1 | Alice | Physics | 2 | +| 1 | Alice | Programming | 1 | +| 2 | Bob | Math | 1 | +| 2 | Bob | Physics | 0 | +| 2 | Bob | Programming | 1 | +| 6 | Alex | Math | 0 | +| 6 | Alex | Physics | 0 | +| 6 | Alex | Programming | 0 | +| 13 | John | Math | 1 | +| 13 | John | Physics | 1 | +| 13 | John | Programming | 1 | + +--- + +## πŸ–₯ SQL Solution + +### 1️⃣ Standard MySQL Query +#### **Explanation:** +- **Use `CROSS JOIN`** to generate **all possible student-subject combinations**. +- **Use `LEFT JOIN`** to attach attendance records from `Examinations`. +- **Use `COUNT(e.subject_name)`** to count how many times a student attended an exam. +- **Sort results by** `student_id` and `subject_name`. + +```sql +SELECT s.student_id, s.student_name, sb.subject_name, + COUNT(e.subject_name) AS attended_exams +FROM Students s +CROSS JOIN Subjects sb +LEFT JOIN Examinations e +ON s.student_id = e.student_id AND sb.subject_name = e.subject_name +GROUP BY s.student_id, sb.subject_name +ORDER BY s.student_id, sb.subject_name; +``` + +--- + +### 2️⃣ Alternative SQL Query (Using `COALESCE`) +```sql +SELECT s.student_id, s.student_name, sb.subject_name, + COALESCE(COUNT(e.subject_name), 0) AS attended_exams +FROM Students s +CROSS JOIN Subjects sb +LEFT JOIN Examinations e +ON s.student_id = e.student_id AND sb.subject_name = e.subject_name +GROUP BY s.student_id, sb.subject_name +ORDER BY s.student_id, sb.subject_name; +``` +- **Uses `COALESCE(COUNT(...), 0)`** to explicitly handle `NULL` values. + +--- + +### 3️⃣ Alternative SQL Query (Using `WITH ROLLUP`) +```sql +SELECT s.student_id, s.student_name, sb.subject_name, + COUNT(e.subject_name) AS attended_exams +FROM Students s +CROSS JOIN Subjects sb +LEFT JOIN Examinations e +ON s.student_id = e.student_id AND sb.subject_name = e.subject_name +GROUP BY s.student_id, sb.subject_name WITH ROLLUP +HAVING GROUPING(subject_name) = 0 +ORDER BY s.student_id, sb.subject_name; +``` +- **Uses `WITH ROLLUP`** to generate **aggregated results**. + +--- + +## 🐍 Pandas Solution (Python) +#### **Explanation:** +- **Generate all possible (student, subject) pairs** using `pd.merge()`. +- **Group by student_id and subject_name**, then count occurrences. +- **Fill missing values with `0`**. + +```python +import pandas as pd + +def student_exam_attendance(students: pd.DataFrame, subjects: pd.DataFrame, exams: pd.DataFrame) -> pd.DataFrame: + # Create all possible student-subject combinations + all_combinations = students.merge(subjects, how="cross") + + # Merge with exam data + merged = all_combinations.merge(exams, on=["student_id", "subject_name"], how="left") + + # Count the number of times each student attended each exam + result = merged.groupby(["student_id", "student_name", "subject_name"]).size().reset_index(name="attended_exams") + + return result.sort_values(["student_id", "subject_name"]) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Student-Exam-Attendance +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/find-the-number-of-times-each-student-attended-each-exam/) +- πŸ“š [SQL `CROSS JOIN` Documentation](https://www.w3schools.com/sql/sql_join_cross.asp) +- 🐍 [Pandas Merge Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) diff --git a/LeetCode SQL 50 Solution/1321. Restaurant Growth/1321. Restaurant Growth.md b/LeetCode SQL 50 Solution/1321. Restaurant Growth/1321. Restaurant Growth.md new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1321. Restaurant Growth/1321. Restaurant Growth.sql b/LeetCode SQL 50 Solution/1321. Restaurant Growth/1321. Restaurant Growth.sql new file mode 100644 index 0000000..384399e --- /dev/null +++ b/LeetCode SQL 50 Solution/1321. Restaurant Growth/1321. Restaurant Growth.sql @@ -0,0 +1,107 @@ +321. Restaurant Growth +Solved +Medium +Topics +Companies +SQL Schema +Pandas Schema +Table: Customer + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| customer_id | int | +| name | varchar | +| visited_on | date | +| amount | int | ++---------------+---------+ +In SQL,(customer_id, visited_on) is the primary key for this table. +This table contains data about customer transactions in a restaurant. +visited_on is the date on which the customer with ID (customer_id) has visited the restaurant. +amount is the total paid by a customer. + + +You are the restaurant owner and you want to analyze a possible expansion (there will be at least one customer every day). + +Compute the moving average of how much the customer paid in a seven days window (i.e., current day + 6 days before). average_amount should be rounded to two decimal places. + +Return the result table ordered by visited_on in ascending order. + +The result format is in the following example. + + + +Example 1: + +Input: +Customer table: ++-------------+--------------+--------------+-------------+ +| customer_id | name | visited_on | amount | ++-------------+--------------+--------------+-------------+ +| 1 | Jhon | 2019-01-01 | 100 | +| 2 | Daniel | 2019-01-02 | 110 | +| 3 | Jade | 2019-01-03 | 120 | +| 4 | Khaled | 2019-01-04 | 130 | +| 5 | Winston | 2019-01-05 | 110 | +| 6 | Elvis | 2019-01-06 | 140 | +| 7 | Anna | 2019-01-07 | 150 | +| 8 | Maria | 2019-01-08 | 80 | +| 9 | Jaze | 2019-01-09 | 110 | +| 1 | Jhon | 2019-01-10 | 130 | +| 3 | Jade | 2019-01-10 | 150 | ++-------------+--------------+--------------+-------------+ +Output: ++--------------+--------------+----------------+ +| visited_on | amount | average_amount | ++--------------+--------------+----------------+ +| 2019-01-07 | 860 | 122.86 | +| 2019-01-08 | 840 | 120 | +| 2019-01-09 | 840 | 120 | +| 2019-01-10 | 1000 | 142.86 | ++--------------+--------------+----------------+ +Explanation: +1st moving average from 2019-01-01 to 2019-01-07 has an average_amount of (100 + 110 + 120 + 130 + 110 + 140 + 150)/7 = 122.86 +2nd moving average from 2019-01-02 to 2019-01-08 has an average_amount of (110 + 120 + 130 + 110 + 140 + 150 + 80)/7 = 120 +3rd moving average from 2019-01-03 to 2019-01-09 has an average_amount of (120 + 130 + 110 + 140 + 150 + 80 + 110)/7 = 120 +4th moving average from 2019-01-04 to 2019-01-10 has an average_amount of (130 + 110 + 140 + 150 + 80 + 110 + 130 + 150)/7 = 142.86 + + +Solution 1: + +# Write your MySQL query statement below +WITH + t AS ( + SELECT + visited_on, + SUM(amount) OVER ( + ORDER BY visited_on + ROWS 6 PRECEDING + ) AS amount, + RANK() OVER ( + ORDER BY visited_on + ROWS 6 PRECEDING + ) AS rk + FROM + ( + SELECT visited_on, SUM(amount) AS amount + FROM Customer + GROUP BY visited_on + ) AS tt + ) +SELECT visited_on, amount, ROUND(amount / 7, 2) AS average_amount +FROM t +WHERE rk > 6; + + +Solution 2: +# Write your MySQL query statement below +SELECT + a.visited_on, + SUM(b.amount) AS amount, + ROUND(SUM(b.amount) / 7, 2) AS average_amount +FROM + (SELECT DISTINCT visited_on FROM customer) AS a + JOIN customer AS b ON DATEDIFF(a.visited_on, b.visited_on) BETWEEN 0 AND 6 +WHERE a.visited_on >= (SELECT MIN(visited_on) FROM customer) + 6 +GROUP BY 1 +ORDER BY 1; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1321. Restaurant Growth/readme.md b/LeetCode SQL 50 Solution/1321. Restaurant Growth/readme.md new file mode 100644 index 0000000..2c666c1 --- /dev/null +++ b/LeetCode SQL 50 Solution/1321. Restaurant Growth/readme.md @@ -0,0 +1,171 @@ +# 🍽️ Restaurant Growth - LeetCode 321 + +## πŸ“Œ Problem Statement +You are given a table **Customer**, which records daily customer transactions in a restaurant. +The restaurant owner wants to analyze a **7-day moving average** of customer spending. + +### πŸ“Š Customer Table +| Column Name | Type | +| ----------- | ------- | +| customer_id | int | +| name | varchar | +| visited_on | date | +| amount | int | +- **(customer_id, visited_on) is the primary key**. +- `visited_on` represents the date a customer visited the restaurant. +- `amount` represents the total amount paid by a customer on that day. + +--- + +## πŸ”’ Goal: +Compute the **7-day moving average** of customer spending. +- The window consists of **current day + 6 days before**. +- `average_amount` should be **rounded to 2 decimal places**. +- The result should be **ordered by `visited_on` in ascending order**. + +--- + +## πŸ“Š Example 1: +### **Input:** +#### **Customer Table** +| customer_id | name | visited_on | amount | +| ----------- | ------- | ---------- | ------ | +| 1 | Jhon | 2019-01-01 | 100 | +| 2 | Daniel | 2019-01-02 | 110 | +| 3 | Jade | 2019-01-03 | 120 | +| 4 | Khaled | 2019-01-04 | 130 | +| 5 | Winston | 2019-01-05 | 110 | +| 6 | Elvis | 2019-01-06 | 140 | +| 7 | Anna | 2019-01-07 | 150 | +| 8 | Maria | 2019-01-08 | 80 | +| 9 | Jaze | 2019-01-09 | 110 | +| 1 | Jhon | 2019-01-10 | 130 | +| 3 | Jade | 2019-01-10 | 150 | + +### **Output:** +| visited_on | amount | average_amount | +| ---------- | ------ | -------------- | +| 2019-01-07 | 860 | 122.86 | +| 2019-01-08 | 840 | 120 | +| 2019-01-09 | 840 | 120 | +| 2019-01-10 | 1000 | 142.86 | + +### **Explanation:** +1. **First moving average (2019-01-01 to 2019-01-07)** + \[ + (100 + 110 + 120 + 130 + 110 + 140 + 150) / 7 = 122.86 + \] +2. **Second moving average (2019-01-02 to 2019-01-08)** + \[ + (110 + 120 + 130 + 110 + 140 + 150 + 80) / 7 = 120 + \] +3. **Third moving average (2019-01-03 to 2019-01-09)** + \[ + (120 + 130 + 110 + 140 + 150 + 80 + 110) / 7 = 120 + \] +4. **Fourth moving average (2019-01-04 to 2019-01-10)** + \[ + (130 + 110 + 140 + 150 + 80 + 110 + 130 + 150) / 7 = 142.86 + \] + +--- + +## πŸ–₯ SQL Solutions + +### 1️⃣ **Using `WINDOW FUNCTION` (`SUM() OVER` + `RANK() OVER`)** +#### **Explanation:** +- First, **group transactions per day** using `SUM(amount)`. +- Then, use `SUM() OVER (ROWS 6 PRECEDING)` to calculate **moving sum** over 7 days. +- Use `RANK()` to track row numbers and filter rows with `rk > 6`. +- Finally, compute `ROUND(amount / 7, 2)`. + +```sql +WITH t AS ( + SELECT + visited_on, + SUM(amount) OVER ( + ORDER BY visited_on + ROWS 6 PRECEDING + ) AS amount, + RANK() OVER ( + ORDER BY visited_on + ROWS 6 PRECEDING + ) AS rk + FROM ( + SELECT visited_on, SUM(amount) AS amount + FROM Customer + GROUP BY visited_on + ) AS tt +) +SELECT visited_on, amount, ROUND(amount / 7, 2) AS average_amount +FROM t +WHERE rk > 6; +``` + +--- + +### 2️⃣ **Using `JOIN` + `DATEDIFF()`** +#### **Explanation:** +- Use a **self-join** to find transactions **within a 7-day range**. +- Sum the `amount` for each window and calculate the moving average. +- Use `DATEDIFF(a.visited_on, b.visited_on) BETWEEN 0 AND 6` to filter records. +- Ensure only complete 7-day windows are included. + +```sql +SELECT + a.visited_on, + SUM(b.amount) AS amount, + ROUND(SUM(b.amount) / 7, 2) AS average_amount +FROM + (SELECT DISTINCT visited_on FROM customer) AS a + JOIN customer AS b + ON DATEDIFF(a.visited_on, b.visited_on) BETWEEN 0 AND 6 +WHERE + a.visited_on >= (SELECT MIN(visited_on) FROM customer) + 6 +GROUP BY a.visited_on +ORDER BY a.visited_on; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### **Explanation:** +- **Group by `visited_on`** and sum `amount` per day. +- **Use `.rolling(7).sum()`** to compute the moving sum over 7 days. +- **Drop NaN values** to exclude incomplete windows. +- **Round the average to 2 decimal places**. + +```python +import pandas as pd + +def restaurant_growth(customers: pd.DataFrame) -> pd.DataFrame: + # Aggregate daily amounts + daily_amount = customers.groupby("visited_on")["amount"].sum().reset_index() + + # Compute rolling 7-day sum and moving average + daily_amount["amount"] = daily_amount["amount"].rolling(7).sum() + daily_amount["average_amount"] = (daily_amount["amount"] / 7).round(2) + + # Drop incomplete windows + daily_amount = daily_amount.dropna().reset_index(drop=True) + + return daily_amount +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Restaurant-Growth +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/restaurant-growth/) +- πŸ“š [SQL `WINDOW FUNCTIONS` Documentation](https://www.w3schools.com/sql/sql_window.asp) +- 🐍 [Pandas Rolling Window](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html) diff --git a/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/1327. List the Products Ordered in a Period.SQL b/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/1327. List the Products Ordered in a Period.SQL new file mode 100644 index 0000000..68c34dc --- /dev/null +++ b/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/1327. List the Products Ordered in a Period.SQL @@ -0,0 +1,95 @@ +1327. List the Products Ordered in a Period +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Products + ++------------------+---------+ +| Column Name | Type | ++------------------+---------+ +| product_id | int | +| product_name | varchar | +| product_category | varchar | ++------------------+---------+ +product_id is the primary key (column with unique values) for this table. +This table contains data about the company's products. + + +Table: Orders + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| product_id | int | +| order_date | date | +| unit | int | ++---------------+---------+ +This table may have duplicate rows. +product_id is a foreign key (reference column) to the Products table. +unit is the number of products ordered in order_date. + + +Write a solution to get the names of products that have at least 100 units ordered in February 2020 and their amount. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Products table: ++-------------+-----------------------+------------------+ +| product_id | product_name | product_category | ++-------------+-----------------------+------------------+ +| 1 | Leetcode Solutions | Book | +| 2 | Jewels of Stringology | Book | +| 3 | HP | Laptop | +| 4 | Lenovo | Laptop | +| 5 | Leetcode Kit | T-shirt | ++-------------+-----------------------+------------------+ +Orders table: ++--------------+--------------+----------+ +| product_id | order_date | unit | ++--------------+--------------+----------+ +| 1 | 2020-02-05 | 60 | +| 1 | 2020-02-10 | 70 | +| 2 | 2020-01-18 | 30 | +| 2 | 2020-02-11 | 80 | +| 3 | 2020-02-17 | 2 | +| 3 | 2020-02-24 | 3 | +| 4 | 2020-03-01 | 20 | +| 4 | 2020-03-04 | 30 | +| 4 | 2020-03-04 | 60 | +| 5 | 2020-02-25 | 50 | +| 5 | 2020-02-27 | 50 | +| 5 | 2020-03-01 | 50 | ++--------------+--------------+----------+ +Output: ++--------------------+---------+ +| product_name | unit | ++--------------------+---------+ +| Leetcode Solutions | 130 | +| Leetcode Kit | 100 | ++--------------------+---------+ +Explanation: +Products with product_id = 1 is ordered in February a total of (60 + 70) = 130. +Products with product_id = 2 is ordered in February a total of 80. +Products with product_id = 3 is ordered in February a total of (2 + 3) = 5. +Products with product_id = 4 was not ordered in February 2020. +Products with product_id = 5 is ordered in February a total of (50 + 50) = 100. +''' + +# Write your MySQL query statement below +SELECT P.PRODUCT_NAME, SUM(O.UNIT) AS UNIT +FROM PRODUCTS P +INNER JOIN ORDERS O +ON P.PRODUCT_ID = O.PRODUCT_ID +WHERE O.ORDER_DATE BETWEEN '2020-02-01' AND '2020-02-29' +GROUP BY P.PRODUCT_NAME +HAVING SUM(O.UNIT) >= 100; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/1327. List the Products Ordered in a Period.py b/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/1327. List the Products Ordered in a Period.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/readme.md b/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/readme.md new file mode 100644 index 0000000..7ab2066 --- /dev/null +++ b/LeetCode SQL 50 Solution/1327. List the Products Ordered in a Period/readme.md @@ -0,0 +1,157 @@ +# πŸ›’ List the Products Ordered in a Period - LeetCode 1327 + +## πŸ“Œ Problem Statement +You are given two tables: **Products** and **Orders**. +Your task is to **list the product names** that had at least **100 units ordered in February 2020** along with the total amount ordered. + +--- + +## πŸ“Š Table Structure + +### **Products Table** +| Column Name | Type | +| ---------------- | ------- | +| product_id | int | +| product_name | varchar | +| product_category | varchar | + +- `product_id` is the **primary key** (unique identifier). +- This table contains details about products. + +--- + +### **Orders Table** +| Column Name | Type | +| ----------- | ---- | +| product_id | int | +| order_date | date | +| unit | int | + +- `product_id` is a **foreign key** referencing the `Products` table. +- `order_date` represents when the order was placed. +- `unit` represents the **number of products ordered** on that date. +- The table **may contain duplicate rows**. + +--- + +## πŸ”’ Goal: +Find all products that had **at least 100 units ordered** during **February 2020** and display: +- `product_name` +- Total `unit` ordered in that period + +--- + +## πŸ“Š Example 1: +### **Input:** +#### **Products Table** +| product_id | product_name | product_category | +| ---------- | --------------------- | ---------------- | +| 1 | Leetcode Solutions | Book | +| 2 | Jewels of Stringology | Book | +| 3 | HP | Laptop | +| 4 | Lenovo | Laptop | +| 5 | Leetcode Kit | T-shirt | + +#### **Orders Table** +| product_id | order_date | unit | +| ---------- | ---------- | ---- | +| 1 | 2020-02-05 | 60 | +| 1 | 2020-02-10 | 70 | +| 2 | 2020-01-18 | 30 | +| 2 | 2020-02-11 | 80 | +| 3 | 2020-02-17 | 2 | +| 3 | 2020-02-24 | 3 | +| 4 | 2020-03-01 | 20 | +| 4 | 2020-03-04 | 30 | +| 4 | 2020-03-04 | 60 | +| 5 | 2020-02-25 | 50 | +| 5 | 2020-02-27 | 50 | +| 5 | 2020-03-01 | 50 | + +### **Output:** +| product_name | unit | +| ------------------ | ---- | +| Leetcode Solutions | 130 | +| Leetcode Kit | 100 | + +### **Explanation:** +- **Leetcode Solutions** (ID=1) was ordered in February: + \[ + 60 + 70 = 130 \quad (\text{βœ“ included}) + \] +- **Jewels of Stringology** (ID=2) was ordered **only 80** times in February. (**βœ— not included**) +- **HP Laptop** (ID=3) was ordered **5 times** in February. (**βœ— not included**) +- **Lenovo Laptop** (ID=4) was **not ordered** in February. (**βœ— not included**) +- **Leetcode Kit** (ID=5) was ordered **100 times** in February. (**βœ“ included**) + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Using `JOIN` + `GROUP BY` + `HAVING`** +#### **Explanation:** +1. **Join** the `Products` and `Orders` tables on `product_id`. +2. **Filter orders** placed in **February 2020** (`BETWEEN '2020-02-01' AND '2020-02-29'`). +3. **Sum up the `unit` ordered** for each product. +4. **Use `HAVING` to filter products with at least 100 units.** +5. Return results in **any order**. + +```sql +SELECT P.PRODUCT_NAME, SUM(O.UNIT) AS UNIT +FROM PRODUCTS P +INNER JOIN ORDERS O +ON P.PRODUCT_ID = O.PRODUCT_ID +WHERE O.ORDER_DATE BETWEEN '2020-02-01' AND '2020-02-29' +GROUP BY P.PRODUCT_NAME +HAVING SUM(O.UNIT) >= 100; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### **Explanation:** +1. **Merge** `products` and `orders` on `product_id`. +2. **Filter only February 2020 orders**. +3. **Group by `product_name`** and **sum `unit`**. +4. **Filter products with at least 100 units**. +5. **Return the final DataFrame**. + +```python +import pandas as pd + +def products_ordered(products: pd.DataFrame, orders: pd.DataFrame) -> pd.DataFrame: + # Merge both tables on product_id + merged_df = pd.merge(orders, products, on="product_id", how="inner") + + # Convert order_date to datetime format and filter February 2020 + merged_df["order_date"] = pd.to_datetime(merged_df["order_date"]) + feb_orders = merged_df[ + (merged_df["order_date"] >= "2020-02-01") & (merged_df["order_date"] <= "2020-02-29") + ] + + # Group by product_name and sum the units + result = feb_orders.groupby("product_name")["unit"].sum().reset_index() + + # Filter products with at least 100 units + result = result[result["unit"] >= 100] + + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ List-Products-Ordered +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/list-the-products-ordered-in-a-period/) +- πŸ“š [SQL `HAVING` Clause](https://www.w3schools.com/sql/sql_having.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/1341. Movie Rating/1341. Movie Rating.py b/LeetCode SQL 50 Solution/1341. Movie Rating/1341. Movie Rating.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1341. Movie Rating/1341. Movie Rating.sql b/LeetCode SQL 50 Solution/1341. Movie Rating/1341. Movie Rating.sql new file mode 100644 index 0000000..d5af570 --- /dev/null +++ b/LeetCode SQL 50 Solution/1341. Movie Rating/1341. Movie Rating.sql @@ -0,0 +1,115 @@ +1341. Movie Rating +""" +Table: Movies + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| movie_id | int | +| title | varchar | ++---------------+---------+ +movie_id is the primary key (column with unique values) for this table. +title is the name of the movie. + + +Table: Users + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| user_id | int | +| name | varchar | ++---------------+---------+ +user_id is the primary key (column with unique values) for this table. +The column 'name' has unique values. +Table: MovieRating + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| movie_id | int | +| user_id | int | +| rating | int | +| created_at | date | ++---------------+---------+ +(movie_id, user_id) is the primary key (column with unique values) for this table. +This table contains the rating of a movie by a user in their review. +created_at is the user's review date. + + +Write a solution to: + +Find the name of the user who has rated the greatest number of movies. In case of a tie, return the lexicographically smaller user name. +Find the movie name with the highest average rating in February 2020. In case of a tie, return the lexicographically smaller movie name. +The result format is in the following example. + + + +Example 1: + +Input: +Movies table: ++-------------+--------------+ +| movie_id | title | ++-------------+--------------+ +| 1 | Avengers | +| 2 | Frozen 2 | +| 3 | Joker | ++-------------+--------------+ +Users table: ++-------------+--------------+ +| user_id | name | ++-------------+--------------+ +| 1 | Daniel | +| 2 | Monica | +| 3 | Maria | +| 4 | James | ++-------------+--------------+ +MovieRating table: ++-------------+--------------+--------------+-------------+ +| movie_id | user_id | rating | created_at | ++-------------+--------------+--------------+-------------+ +| 1 | 1 | 3 | 2020-01-12 | +| 1 | 2 | 4 | 2020-02-11 | +| 1 | 3 | 2 | 2020-02-12 | +| 1 | 4 | 1 | 2020-01-01 | +| 2 | 1 | 5 | 2020-02-17 | +| 2 | 2 | 2 | 2020-02-01 | +| 2 | 3 | 2 | 2020-03-01 | +| 3 | 1 | 3 | 2020-02-22 | +| 3 | 2 | 4 | 2020-02-25 | ++-------------+--------------+--------------+-------------+ +Output: ++--------------+ +| results | ++--------------+ +| Daniel | +| Frozen 2 | ++--------------+ +Explanation: +Daniel and Monica have rated 3 movies ("Avengers", "Frozen 2" and "Joker") but Daniel is smaller lexicographically. +Frozen 2 and Joker have a rating average of 3.5 in February but Frozen 2 is smaller lexicographically. + +""" + +# Write your MySQL query statement below +( + SELECT name AS results + FROM + Users + JOIN MovieRating USING (user_id) + GROUP BY user_id + ORDER BY COUNT(1) DESC, name + LIMIT 1 +) +UNION ALL +( + SELECT title + FROM + MovieRating + JOIN Movies USING (movie_id) + WHERE DATE_FORMAT(created_at, '%Y-%m') = '2020-02' + GROUP BY movie_id + ORDER BY AVG(rating) DESC, title + LIMIT 1 +); \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1341. Movie Rating/readme.md b/LeetCode SQL 50 Solution/1341. Movie Rating/readme.md new file mode 100644 index 0000000..bcfc6ac --- /dev/null +++ b/LeetCode SQL 50 Solution/1341. Movie Rating/readme.md @@ -0,0 +1,197 @@ +# 🎬 Movie Rating - LeetCode 1341 + +## πŸ“Œ Problem Statement +You are given three tables: **Movies**, **Users**, and **MovieRating**. + +Your task is to: +1. Find the **user who has rated the greatest number of movies**. + - In case of a tie, return the **lexicographically smaller** name. +2. Find the **movie with the highest average rating** in **February 2020**. + - In case of a tie, return the **lexicographically smaller** movie title. + +--- + +## πŸ“Š Table Structure + +### **Movies Table** +| Column Name | Type | +| ----------- | ------- | +| movie_id | int | +| title | varchar | + +- `movie_id` is the **primary key** (unique identifier). +- `title` is the **name of the movie**. + +--- + +### **Users Table** +| Column Name | Type | +| ----------- | ------- | +| user_id | int | +| name | varchar | + +- `user_id` is the **primary key** (unique identifier). +- `name` is **unique** for each user. + +--- + +### **MovieRating Table** +| Column Name | Type | +| ----------- | ---- | +| movie_id | int | +| user_id | int | +| rating | int | +| created_at | date | + +- `(movie_id, user_id)` is the **primary key** (ensuring unique user-movie ratings). +- `created_at` represents the **review date**. + +--- + +## πŸ”’ Goal: +- Return a **single-column result** containing: + 1. **User name** with the most ratings. + 2. **Movie title** with the highest **average rating** in **February 2020**. + +--- + +## πŸ“Š Example 1: +### **Input:** +#### **Movies Table** +| movie_id | title | +| -------- | -------- | +| 1 | Avengers | +| 2 | Frozen 2 | +| 3 | Joker | + +#### **Users Table** +| user_id | name | +| ------- | ------ | +| 1 | Daniel | +| 2 | Monica | +| 3 | Maria | +| 4 | James | + +#### **MovieRating Table** +| movie_id | user_id | rating | created_at | +| -------- | ------- | ------ | ---------- | +| 1 | 1 | 3 | 2020-01-12 | +| 1 | 2 | 4 | 2020-02-11 | +| 1 | 3 | 2 | 2020-02-12 | +| 1 | 4 | 1 | 2020-01-01 | +| 2 | 1 | 5 | 2020-02-17 | +| 2 | 2 | 2 | 2020-02-01 | +| 2 | 3 | 2 | 2020-03-01 | +| 3 | 1 | 3 | 2020-02-22 | +| 3 | 2 | 4 | 2020-02-25 | + +### **Output:** +| results | +| -------- | +| Daniel | +| Frozen 2 | + +### **Explanation:** +- **Most Active User:** + - `Daniel` and `Monica` both rated **3 movies**. + - Since `Daniel` is **lexicographically smaller**, he is chosen. + +- **Highest Average Movie Rating in February 2020:** + - **Frozen 2**: `(5 + 2) / 2 = 3.5` + - **Joker**: `(3 + 4) / 2 = 3.5` + - Since **Frozen 2** is **lexicographically smaller**, it is chosen. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Using `JOIN` + `GROUP BY` + `HAVING`** +#### **Explanation:** +1. **Find the most active user:** + - Count the number of ratings per user. + - Use `ORDER BY COUNT(*) DESC, name` to get the **user with the most ratings**, breaking ties lexicographically. + - Limit the result to **1 user**. + +2. **Find the highest-rated movie in February 2020:** + - Filter rows where `created_at` is **in February 2020**. + - **Calculate the average rating per movie**. + - Use `ORDER BY AVG(rating) DESC, title` to get the **highest-rated movie**, breaking ties lexicographically. + - Limit the result to **1 movie**. + +```sql +( + SELECT name AS results + FROM + Users + JOIN MovieRating USING (user_id) + GROUP BY user_id + ORDER BY COUNT(1) DESC, name + LIMIT 1 +) +UNION ALL +( + SELECT title + FROM + MovieRating + JOIN Movies USING (movie_id) + WHERE DATE_FORMAT(created_at, '%Y-%m') = '2020-02' + GROUP BY movie_id + ORDER BY AVG(rating) DESC, title + LIMIT 1 +); +``` + +--- + +## 🐍 Pandas Solution (Python) +#### **Explanation:** +1. **Find the user with the most ratings:** + - Group by `user_id`, count the ratings. + - Merge with `Users` table to get `name`. + - Sort by **count descending**, then **lexicographically**. + +2. **Find the highest-rated movie in February 2020:** + - Filter only `created_at` **in February 2020**. + - Group by `movie_id` and calculate **average rating**. + - Merge with `Movies` to get `title`. + - Sort by **rating descending**, then **lexicographically**. + +```python +import pandas as pd + +def movie_rating(users: pd.DataFrame, movies: pd.DataFrame, movie_rating: pd.DataFrame) -> pd.DataFrame: + # Most active user + user_counts = movie_rating.groupby("user_id")["rating"].count().reset_index() + most_active_user = user_counts.merge(users, on="user_id") + most_active_user = most_active_user.sort_values(by=["rating", "name"], ascending=[False, True]).iloc[0]["name"] + + # Highest-rated movie in February 2020 + movie_rating["created_at"] = pd.to_datetime(movie_rating["created_at"]) + feb_ratings = movie_rating[movie_rating["created_at"].dt.strftime('%Y-%m') == "2020-02"] + + avg_ratings = feb_ratings.groupby("movie_id")["rating"].mean().reset_index() + highest_rated_movie = avg_ratings.merge(movies, on="movie_id") + highest_rated_movie = highest_rated_movie.sort_values(by=["rating", "title"], ascending=[False, True]).iloc[0]["title"] + + return pd.DataFrame({"results": [most_active_user, highest_rated_movie]}) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Movie-Rating +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/movie-rating/) +- πŸ“š [SQL `GROUP BY` Clause](https://www.w3schools.com/sql/sql_groupby.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) + +## Would you like any changes or additions? πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1378. Replace Employee ID With The Unique Identifier/1378. Replace Employee ID With The Unique Identifier.md b/LeetCode SQL 50 Solution/1378. Replace Employee ID With The Unique Identifier/1378. Replace Employee ID With The Unique Identifier.md new file mode 100644 index 0000000..e69de29 diff --git a/1378. Replace Employee ID With The Unique Identifier.sql b/LeetCode SQL 50 Solution/1378. Replace Employee ID With The Unique Identifier/1378. Replace Employee ID With The Unique Identifier.sql similarity index 100% rename from 1378. Replace Employee ID With The Unique Identifier.sql rename to LeetCode SQL 50 Solution/1378. Replace Employee ID With The Unique Identifier/1378. Replace Employee ID With The Unique Identifier.sql diff --git a/LeetCode SQL 50 Solution/1378. Replace Employee ID With The Unique Identifier/readme.md b/LeetCode SQL 50 Solution/1378. Replace Employee ID With The Unique Identifier/readme.md new file mode 100644 index 0000000..a92e9b1 --- /dev/null +++ b/LeetCode SQL 50 Solution/1378. Replace Employee ID With The Unique Identifier/readme.md @@ -0,0 +1,143 @@ +# 🏒 Replace Employee ID With The Unique Identifier - LeetCode 1378 + +## πŸ“Œ Problem Statement +You are given two tables: **Employees** and **EmployeeUNI**. + +Your task is to return a table with: +- Each employee's **unique ID** if it exists. +- If an employee **does not** have a unique ID, return `NULL`. + +The result can be returned in **any order**. + +--- + +## πŸ“Š Table Structure + +### **Employees Table** +| Column Name | Type | +| ----------- | ------- | +| id | int | +| name | varchar | + +- `id` is the **primary key** (unique for each employee). +- `name` is the **employee's name**. + +--- + +### **EmployeeUNI Table** +| Column Name | Type | +| ----------- | ---- | +| id | int | +| unique_id | int | + +- `(id, unique_id)` is the **primary key** (ensuring unique mapping of employee IDs to unique IDs). +- Each employee **may or may not** have a corresponding **unique ID**. + +--- + +## πŸ“Š Example 1: +### **Input:** +#### **Employees Table** +| id | name | +| --- | -------- | +| 1 | Alice | +| 7 | Bob | +| 11 | Meir | +| 90 | Winston | +| 3 | Jonathan | + +#### **EmployeeUNI Table** +| id | unique_id | +| --- | --------- | +| 3 | 1 | +| 11 | 2 | +| 90 | 3 | + +### **Output:** +| unique_id | name | +| --------- | -------- | +| null | Alice | +| null | Bob | +| 2 | Meir | +| 3 | Winston | +| 1 | Jonathan | + +### **Explanation:** +- `Alice` and `Bob` **do not have** a unique ID, so we return `NULL`. +- The **unique ID** of `Meir` is **2**. +- The **unique ID** of `Winston` is **3**. +- The **unique ID** of `Jonathan` is **1**. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Using `LEFT JOIN`** +#### **Explanation:** +- Use a **LEFT JOIN** to **include all employees**. +- If an employee **does not have** a matching `unique_id`, return `NULL`. + +```sql +SELECT eu.unique_id, e.name +FROM Employees e +LEFT JOIN EmployeeUNI eu +ON e.id = eu.id; +``` + +### βœ… **Using `USING(id)`** +#### **Explanation:** +- `USING(id)` is a cleaner alternative when both tables share a column. + +```sql +SELECT unique_id, name +FROM Employees +LEFT JOIN EmployeeUNI +USING (id); +``` + +### βœ… **Sorting by `id` (Optional)** +#### **Explanation:** +- If you want to return the result **sorted by `id`**, add `ORDER BY e.id`: + +```sql +SELECT eu.unique_id, e.name +FROM Employees e +LEFT JOIN EmployeeUNI eu +ON e.id = eu.id +ORDER BY e.id; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### **Explanation:** +- Merge `Employees` with `EmployeeUNI` **using `left` join** on `id`. +- Fill missing values (`NaN`) with `None`. + +```python +import pandas as pd + +def replace_employee_id(employees: pd.DataFrame, employee_uni: pd.DataFrame) -> pd.DataFrame: + merged_df = employees.merge(employee_uni, on="id", how="left") + return merged_df[["unique_id", "name"]] +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Replace-Employee-ID +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/replace-employee-id-with-the-unique-identifier/) +- πŸ“š [SQL `LEFT JOIN`](https://www.w3schools.com/sql/sql_join_left.asp) +- 🐍 [Pandas Merge Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) + +## Would you like any modifications? πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/1484. Group Sold Products By The Date.SQL b/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/1484. Group Sold Products By The Date.SQL new file mode 100644 index 0000000..b89bee9 --- /dev/null +++ b/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/1484. Group Sold Products By The Date.SQL @@ -0,0 +1,64 @@ +1484. Group Sold Products By The Date +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table Activities: + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| sell_date | date | +| product | varchar | ++-------------+---------+ +There is no primary key (column with unique values) for this table. It may contain duplicates. +Each row of this table contains the product name and the date it was sold in a market. + + +Write a solution to find for each date the number of different products sold and their names. + +The sold products names for each date should be sorted lexicographically. + +Return the result table ordered by sell_date. + +The result format is in the following example. + + + +Example 1: + +Input: +Activities table: ++------------+------------+ +| sell_date | product | ++------------+------------+ +| 2020-05-30 | Headphone | +| 2020-06-01 | Pencil | +| 2020-06-02 | Mask | +| 2020-05-30 | Basketball | +| 2020-06-01 | Bible | +| 2020-06-02 | Mask | +| 2020-05-30 | T-Shirt | ++------------+------------+ +Output: ++------------+----------+------------------------------+ +| sell_date | num_sold | products | ++------------+----------+------------------------------+ +| 2020-05-30 | 3 | Basketball,Headphone,T-shirt | +| 2020-06-01 | 2 | Bible,Pencil | +| 2020-06-02 | 1 | Mask | ++------------+----------+------------------------------+ +Explanation: +For 2020-05-30, Sold items were (Headphone, Basketball, T-shirt), we sort them lexicographically and separate them by a comma. +For 2020-06-01, Sold items were (Pencil, Bible), we sort them lexicographically and separate them by a comma. +For 2020-06-02, the Sold item is (Mask), we just return it. + + + +select sell_date, count( DISTINCT product ) as num_sold , + + GROUP_CONCAT( DISTINCT product order by product ASC separator ',' ) as products + + FROM Activities GROUP BY sell_date order by sell_date ASC; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/1484. Group Sold Products By The Date.py b/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/1484. Group Sold Products By The Date.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/readme.md b/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/readme.md new file mode 100644 index 0000000..318b5fd --- /dev/null +++ b/LeetCode SQL 50 Solution/1484. Group Sold Products By The Date/readme.md @@ -0,0 +1,112 @@ +# πŸ›οΈ Group Sold Products By The Date - LeetCode 1484 + +## πŸ“Œ Problem Statement +You are given a table **Activities** that contains records of products sold on different dates. + +Your task is to return: +- The **number of distinct products** sold on each date. +- A **comma-separated string** of the product names, sorted **lexicographically**. + +The result should be **ordered by `sell_date`**. + +--- + +## πŸ“Š Table Structure + +### **Activities Table** +| Column Name | Type | +| ----------- | ------- | +| sell_date | date | +| product | varchar | + +- This table **does not** have a **primary key**. +- It may **contain duplicate entries**. + +--- + +## πŸ“Š Example 1: +### **Input:** +#### **Activities Table** +| sell_date | product | +| ---------- | ---------- | +| 2020-05-30 | Headphone | +| 2020-06-01 | Pencil | +| 2020-06-02 | Mask | +| 2020-05-30 | Basketball | +| 2020-06-01 | Bible | +| 2020-06-02 | Mask | +| 2020-05-30 | T-Shirt | + +### **Output:** +| sell_date | num_sold | products | +| ---------- | -------- | ---------------------------- | +| 2020-05-30 | 3 | Basketball,Headphone,T-Shirt | +| 2020-06-01 | 2 | Bible,Pencil | +| 2020-06-02 | 1 | Mask | + +### **Explanation:** +- `2020-05-30`: Sold items β†’ _(Headphone, Basketball, T-Shirt)_ + - Sorted β†’ **"Basketball, Headphone, T-Shirt"** +- `2020-06-01`: Sold items β†’ _(Pencil, Bible)_ + - Sorted β†’ **"Bible, Pencil"** +- `2020-06-02`: Sold item β†’ _(Mask)_ + - **"Mask"** (only one item) + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Using `GROUP_CONCAT` with `DISTINCT`** +#### **Explanation:** +- Use `COUNT(DISTINCT product)` to get the **number of distinct products**. +- Use `GROUP_CONCAT(DISTINCT product ORDER BY product ASC)` to **join product names in alphabetical order**. +- Group by `sell_date`, then order the result by `sell_date`. + +```sql +SELECT + sell_date, + COUNT(DISTINCT product) AS num_sold, + GROUP_CONCAT(DISTINCT product ORDER BY product ASC SEPARATOR ',') AS products +FROM Activities +GROUP BY sell_date +ORDER BY sell_date ASC; +``` + +--- + +## 🐍 Pandas Solution (Python) +#### **Explanation:** +- **Group by `sell_date`**. +- Use `.nunique()` to count distinct products. +- Use `', '.join(sorted(set(products)))` to sort and concatenate product names. + +```python +import pandas as pd + +def group_sold_products(activities: pd.DataFrame) -> pd.DataFrame: + grouped_df = ( + activities.groupby("sell_date")["product"] + .agg(lambda x: ", ".join(sorted(set(x)))) + .reset_index() + ) + grouped_df["num_sold"] = grouped_df["product"].apply(lambda x: len(x.split(","))) + return grouped_df.rename(columns={"product": "products"}) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Group-Sold-Products +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/group-sold-products-by-the-date/) +- πŸ“š [SQL `GROUP BY`](https://www.w3schools.com/sql/sql_groupby.asp) +- 🐍 [Pandas `groupby()` Documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/1517. Find Users With Valid E-Mails.SQL b/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/1517. Find Users With Valid E-Mails.SQL new file mode 100644 index 0000000..07831f5 --- /dev/null +++ b/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/1517. Find Users With Valid E-Mails.SQL @@ -0,0 +1,66 @@ +1517. Find Users With Valid E-Mails +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Users + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| user_id | int | +| name | varchar | +| mail | varchar | ++---------------+---------+ +user_id is the primary key (column with unique values) for this table. +This table contains information of the users signed up in a website. Some e-mails are invalid. + + +Write a solution to find the users who have valid emails. + +A valid e-mail has a prefix name and a domain where: + +The prefix name is a string that may contain letters (upper or lower case), digits, underscore '_', period '.', and/or dash '-'. The prefix name must start with a letter. +The domain is '@leetcode.com'. +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Users table: ++---------+-----------+-------------------------+ +| user_id | name | mail | ++---------+-----------+-------------------------+ +| 1 | Winston | winston@leetcode.com | +| 2 | Jonathan | jonathanisgreat | +| 3 | Annabelle | bella-@leetcode.com | +| 4 | Sally | sally.come@leetcode.com | +| 5 | Marwan | quarz#2020@leetcode.com | +| 6 | David | david69@gmail.com | +| 7 | Shapiro | .shapo@leetcode.com | ++---------+-----------+-------------------------+ +Output: ++---------+-----------+-------------------------+ +| user_id | name | mail | ++---------+-----------+-------------------------+ +| 1 | Winston | winston@leetcode.com | +| 3 | Annabelle | bella-@leetcode.com | +| 4 | Sally | sally.come@leetcode.com | ++---------+-----------+-------------------------+ +Explanation: +The mail of user 2 does not have a domain. +The mail of user 5 has the # sign which is not allowed. +The mail of user 6 does not have the leetcode domain. +The mail of user 7 starts with a period. + +SPLUTIONS: + +SELECT * +FROM Users +WHERE REGEXP_LIKE(mail, '^[A-Za-z]+[A-Za-z0-9\_\.\-]*@leetcode\\.com$'); diff --git a/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/1517. Find Users With Valid E-Mails.py b/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/1517. Find Users With Valid E-Mails.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/readme.md b/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/readme.md new file mode 100644 index 0000000..4162dc3 --- /dev/null +++ b/LeetCode SQL 50 Solution/1517. Find Users With Valid E-Mails/readme.md @@ -0,0 +1,97 @@ +# πŸ“© Find Users With Valid E-Mails - LeetCode 1517 + +## πŸ“Œ Problem Statement +You are given a table **Users** that contains user registration details, including their emails. Some of these emails may be **invalid**. + +A valid email: +- Has a **prefix name** and a **domain**. +- The prefix: + - **Must start with a letter** (uppercase or lowercase). + - Can contain **letters**, **digits**, **underscore (`_`)**, **period (`.`)**, and/or **dash (`-`)**. +- The domain must be **"@leetcode.com"**. + +Your task is to **find users with valid emails**. + +--- + +## πŸ“Š Table Structure + +### **Users Table** +| Column Name | Type | +| ----------- | ------- | +| user_id | int | +| name | varchar | +| mail | varchar | + +- `user_id` is the **primary key**. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Users Table** +| user_id | name | mail | +| ------- | --------- | ----------------------- | +| 1 | Winston | winston@leetcode.com | +| 2 | Jonathan | jonathanisgreat | +| 3 | Annabelle | bella-@leetcode.com | +| 4 | Sally | sally.come@leetcode.com | +| 5 | Marwan | quarz#2020@leetcode.com | +| 6 | David | david69@gmail.com | +| 7 | Shapiro | .shapo@leetcode.com | + +### **Output:** +| user_id | name | mail | +| ------- | --------- | ----------------------- | +| 1 | Winston | winston@leetcode.com | +| 3 | Annabelle | bella-@leetcode.com | +| 4 | Sally | sally.come@leetcode.com | + +### **Explanation:** +- βœ… **Valid emails:** + - `winston@leetcode.com` βœ… (Starts with a letter, correct domain) + - `bella-@leetcode.com` βœ… (Starts with a letter, correct domain) + - `sally.come@leetcode.com` βœ… (Starts with a letter, correct domain) +- ❌ **Invalid emails:** + - `jonathanisgreat` ❌ (No domain) + - `quarz#2020@leetcode.com` ❌ (Contains `#`, which is not allowed) + - `david69@gmail.com` ❌ (Wrong domain) + - `.shapo@leetcode.com` ❌ (Starts with a period `.`) + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Using `REGEXP_LIKE` (MySQL)** +#### **Explanation:** +- Use `REGEXP_LIKE(mail, '^[A-Za-z]+[A-Za-z0-9_.-]*@leetcode\\.com$')` + - `^` β†’ Start of string. + - `[A-Za-z]+` β†’ First character **must** be a **letter**. + - `[A-Za-z0-9_.-]*` β†’ Rest can be **letters, numbers, `_`, `.`, or `-`**. + - `@leetcode\\.com$` β†’ Must end with `"@leetcode.com"`. + +```sql +SELECT * +FROM Users +WHERE REGEXP_LIKE(mail, '^[A-Za-z]+[A-Za-z0-9_.-]*@leetcode\\.com$'); +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Find-Users-With-Valid-Emails +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/find-users-with-valid-e-mails/) +- πŸ” [MySQL REGEXP_LIKE Documentation](https://dev.mysql.com/doc/refman/8.0/en/regexp.html) +- πŸ“ [SQL Regular Expressions Cheatsheet](https://www.w3schools.com/sql/sql_regex.asp) +her learning** +## Would you like any modifications? πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1527. Patients With a Condition/1527. Patients With a Condition.py b/LeetCode SQL 50 Solution/1527. Patients With a Condition/1527. Patients With a Condition.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1527. Patients With a Condition/1527. Patients With a Condition.sql b/LeetCode SQL 50 Solution/1527. Patients With a Condition/1527. Patients With a Condition.sql new file mode 100644 index 0000000..e51a43e --- /dev/null +++ b/LeetCode SQL 50 Solution/1527. Patients With a Condition/1527. Patients With a Condition.sql @@ -0,0 +1,57 @@ +1527. Patients With a Condition +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Patients + ++--------------+---------+ +| Column Name | Type | ++--------------+---------+ +| patient_id | int | +| patient_name | varchar | +| conditions | varchar | ++--------------+---------+ +patient_id is the primary key (column with unique values) for this table. +'conditions' contains 0 or more code separated by spaces. +This table contains information of the patients in the hospital. + + +Write a solution to find the patient_id, patient_name, and conditions of the patients who have Type I Diabetes. Type I Diabetes always starts with DIAB1 prefix. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Patients table: ++------------+--------------+--------------+ +| patient_id | patient_name | conditions | ++------------+--------------+--------------+ +| 1 | Daniel | YFEV COUGH | +| 2 | Alice | | +| 3 | Bob | DIAB100 MYOP | +| 4 | George | ACNE DIAB100 | +| 5 | Alain | DIAB201 | ++------------+--------------+--------------+ +Output: ++------------+--------------+--------------+ +| patient_id | patient_name | conditions | ++------------+--------------+--------------+ +| 3 | Bob | DIAB100 MYOP | +| 4 | George | ACNE DIAB100 | ++------------+--------------+--------------+ +Explanation: Bob and George both have a condition that starts with DIAB1. + + + +# Write your MySQL query statement below +SELECT patient_id, patient_name, conditions +FROM Patients +WHERE conditions LIKE 'DIAB1%' OR conditions LIKE '% DIAB1%' \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1527. Patients With a Condition/readme.md b/LeetCode SQL 50 Solution/1527. Patients With a Condition/readme.md new file mode 100644 index 0000000..9eb9694 --- /dev/null +++ b/LeetCode SQL 50 Solution/1527. Patients With a Condition/readme.md @@ -0,0 +1,86 @@ +# πŸ₯ Patients With a Condition - LeetCode 1527 + +## πŸ“Œ Problem Statement +You are given a **Patients** table that stores patient health records, including their medical conditions. + +Each patient's **conditions** column contains **0 or more condition codes**, separated by spaces. + +Your task is to **find all patients who have Type I Diabetes**. +- Type I Diabetes is identified by a condition **starting with the prefix "DIAB1"**. + +Return the result **in any order**. + +--- + +## πŸ“Š Table Structure + +### **Patients Table** +| Column Name | Type | +| ------------ | ------- | +| patient_id | int | +| patient_name | varchar | +| conditions | varchar | + +- `patient_id` is the **primary key**. +- `conditions` contains **space-separated condition codes**. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Patients Table** +| patient_id | patient_name | conditions | +| ---------- | ------------ | ------------ | +| 1 | Daniel | YFEV COUGH | +| 2 | Alice | | +| 3 | Bob | DIAB100 MYOP | +| 4 | George | ACNE DIAB100 | +| 5 | Alain | DIAB201 | + +### **Output:** +| patient_id | patient_name | conditions | +| ---------- | ------------ | ------------ | +| 3 | Bob | DIAB100 MYOP | +| 4 | George | ACNE DIAB100 | + +### **Explanation:** +- βœ… **Bob's condition:** `"DIAB100 MYOP"` β†’ **Starts with `"DIAB1"`** β†’ βœ… **Valid** +- βœ… **George's condition:** `"ACNE DIAB100"` β†’ **Contains `"DIAB1"`** β†’ βœ… **Valid** +- ❌ **Daniel's condition:** `"YFEV COUGH"` β†’ **No `"DIAB1"`** +- ❌ **Alice's condition:** `""` (Empty) +- ❌ **Alain's condition:** `"DIAB201"` β†’ Does **not** start with `"DIAB1"` + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Using `LIKE` with wildcards** +#### **Explanation:** +- We need to check if `"DIAB1"` appears **at the beginning** or **somewhere in the conditions column**. +- `LIKE 'DIAB1%'` β†’ Matches if `"DIAB1"` is at the **start** of the column. +- `LIKE '% DIAB1%'` β†’ Matches if `"DIAB1"` appears **after a space (as part of multiple conditions).** + +```sql +SELECT patient_id, patient_name, conditions +FROM Patients +WHERE conditions LIKE 'DIAB1%' OR conditions LIKE '% DIAB1%'; +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Patients-With-Condition +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/patients-with-a-condition/) +- πŸ” [SQL LIKE Operator](https://www.w3schools.com/sql/sql_like.asp) +- πŸ“ [MySQL String Functions](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html) +## Would you like any refinements? πŸš€ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/1581. Customer Who Visited but Did Not Make Any Transactions.py b/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/1581. Customer Who Visited but Did Not Make Any Transactions.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/1581. Customer Who Visited but Did Not Make Any Transactions.sql b/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/1581. Customer Who Visited but Did Not Make Any Transactions.sql new file mode 100644 index 0000000..21b8124 --- /dev/null +++ b/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/1581. Customer Who Visited but Did Not Make Any Transactions.sql @@ -0,0 +1,6 @@ +# Write your MySQL query statement below +# Write your MySQL query statement below +SELECT customer_id, COUNT(*) as count_no_trans +FROM Visits +WHERE visit_id NOT IN (SELECT DISTINCT visit_id FROM Transactions) +GROUP BY customer_id; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/readme.md b/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/readme.md new file mode 100644 index 0000000..188444c --- /dev/null +++ b/LeetCode SQL 50 Solution/1581. Customer Who Visited but Did Not Make Any Transactions/readme.md @@ -0,0 +1,107 @@ +# 🏬 Customer Who Visited but Did Not Make Any Transactions - LeetCode 1581 + +## πŸ“Œ Problem Statement +You are given two tables, **Visits** and **Transactions**. + +- The **Visits** table records the customers who visited the mall. +- The **Transactions** table records the **transactions made** during a visit. + +Your task is to **find customers who visited but did not make any transactions**. +Also, return the **number of times** these customers **visited without making any transactions**. + +Return the result **in any order**. + +--- + +## πŸ“Š Table Structure + +### **Visits Table** +| Column Name | Type | +| ----------- | ---- | +| visit_id | int | +| customer_id | int | + +- `visit_id` is the **unique identifier** for each visit. + +### **Transactions Table** +| Column Name | Type | +| -------------- | ---- | +| transaction_id | int | +| visit_id | int | +| amount | int | + +- `transaction_id` is the **unique identifier** for each transaction. +- `visit_id` represents the visit **associated with this transaction**. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Visits Table** +| visit_id | customer_id | +| -------- | ----------- | +| 1 | 23 | +| 2 | 9 | +| 4 | 30 | +| 5 | 54 | +| 6 | 96 | +| 7 | 54 | +| 8 | 54 | + +#### **Transactions Table** +| transaction_id | visit_id | amount | +| -------------- | -------- | ------ | +| 2 | 5 | 310 | +| 3 | 5 | 300 | +| 9 | 5 | 200 | +| 12 | 1 | 910 | +| 13 | 2 | 970 | + +### **Output:** +| customer_id | count_no_trans | +| ----------- | -------------- | +| 54 | 2 | +| 30 | 1 | +| 96 | 1 | + +### **Explanation:** +- βœ… **Customer 23:** Visited **once**, made **1 transaction** β†’ ❌ Not included +- βœ… **Customer 9:** Visited **once**, made **1 transaction** β†’ ❌ Not included +- βœ… **Customer 30:** Visited **once**, made **0 transactions** β†’ βœ… Included (`count_no_trans = 1`) +- βœ… **Customer 54:** Visited **3 times**, made **transactions in 1 visit**, **but 2 visits had no transactions** β†’ βœ… Included (`count_no_trans = 2`) +- βœ… **Customer 96:** Visited **once**, made **0 transactions** β†’ βœ… Included (`count_no_trans = 1`) + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Using `NOT IN` to Find Visits Without Transactions** +#### **Explanation:** +- First, **find all visit IDs** that **had at least one transaction** (`SELECT DISTINCT visit_id FROM Transactions`). +- Then, filter out these visit IDs from the **Visits** table. +- Finally, count the number of such visits **per customer**. + +```sql +SELECT customer_id, COUNT(*) AS count_no_trans +FROM Visits +WHERE visit_id NOT IN (SELECT DISTINCT visit_id FROM Transactions) +GROUP BY customer_id; +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Customer-No-Transaction +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/customer-who-visited-but-did-not-make-any-transactions/) +- πŸ” [SQL NOT IN Operator](https://www.w3schools.com/sql/sql_in.asp) +- πŸ“ [MySQL Aggregate Functions](https://dev.mysql.com/doc/refman/8.0/en/group-by-functions.html) diff --git a/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/1633. Percentage of Users Attended a Contest.py b/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/1633. Percentage of Users Attended a Contest.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/1633. Percentage of Users Attended a Contest.sql b/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/1633. Percentage of Users Attended a Contest.sql new file mode 100644 index 0000000..86ac117 --- /dev/null +++ b/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/1633. Percentage of Users Attended a Contest.sql @@ -0,0 +1,84 @@ +1633. Percentage of Users Attended a Contest + +Table: Users + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| user_id | int | +| user_name | varchar | ++-------------+---------+ +user_id is the primary key (column with unique values) for this table. +Each row of this table contains the name and the id of a user. + + +Table: Register + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| contest_id | int | +| user_id | int | ++-------------+---------+ +(contest_id, user_id) is the primary key (combination of columns with unique values) for this table. +Each row of this table contains the id of a user and the contest they registered into. + + +Write a solution to find the percentage of the users registered in each contest rounded to two decimals. + +Return the result table ordered by percentage in descending order. In case of a tie, order it by contest_id in ascending order. + +The result format is in the following example. + + + +Example 1: + +Input: +Users table: ++---------+-----------+ +| user_id | user_name | ++---------+-----------+ +| 6 | Alice | +| 2 | Bob | +| 7 | Alex | ++---------+-----------+ +Register table: ++------------+---------+ +| contest_id | user_id | ++------------+---------+ +| 215 | 6 | +| 209 | 2 | +| 208 | 2 | +| 210 | 6 | +| 208 | 6 | +| 209 | 7 | +| 209 | 6 | +| 215 | 7 | +| 208 | 7 | +| 210 | 2 | +| 207 | 2 | +| 210 | 7 | ++------------+---------+ +Output: ++------------+------------+ +| contest_id | percentage | ++------------+------------+ +| 208 | 100.0 | +| 209 | 100.0 | +| 210 | 100.0 | +| 215 | 66.67 | +| 207 | 33.33 | ++------------+------------+ +Explanation: +All the users registered in contests 208, 209, and 210. The percentage is 100% and we sort them in the answer table by contest_id in ascending order. +Alice and Alex registered in contest 215 and the percentage is ((2/3) * 100) = 66.67% +Bob registered in contest 207 and the percentage is ((1/3) * 100) = 33.33% + + +# Write your MySQL query statement below + +SELECT contest_id, ROUND((COUNT(user_id) / (SELECT COUNT(*) FROM Users) * 100), 2) AS percentage +FROM Register +GROUP BY contest_id +ORDER BY percentage DESC, contest_id ASC; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/readme.md b/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/readme.md new file mode 100644 index 0000000..4922922 --- /dev/null +++ b/LeetCode SQL 50 Solution/1633. Percentage of Users Attended a Contest/readme.md @@ -0,0 +1,115 @@ +# 🎯 Percentage of Users Attended a Contest - LeetCode 1633 + +## πŸ“Œ Problem Statement +You are given two tables, **Users** and **Register**. + +- The **Users** table contains the users who are registered on the platform. +- The **Register** table records which user registered for which contest. + +Your task is to **calculate the percentage of users who attended each contest**. + +- Round the percentage to **two decimal places**. +- **Sort the results** by the percentage in descending order. If the percentages are the same, sort them by **contest_id** in ascending order. + +--- + +## πŸ“Š Table Structure + +### **Users Table** +| Column Name | Type | +| ----------- | ------- | +| user_id | int | +| user_name | varchar | + +- `user_id` is the **primary key** for this table. + +### **Register Table** +| Column Name | Type | +| ----------- | ---- | +| contest_id | int | +| user_id | int | + +- `(contest_id, user_id)` is the **primary key** for this table. +- This table keeps track of which user has registered for which contest. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Users Table** +| user_id | user_name | +| ------- | --------- | +| 6 | Alice | +| 2 | Bob | +| 7 | Alex | + +#### **Register Table** +| contest_id | user_id | +| ---------- | ------- | +| 215 | 6 | +| 209 | 2 | +| 208 | 2 | +| 210 | 6 | +| 208 | 6 | +| 209 | 7 | +| 209 | 6 | +| 215 | 7 | +| 208 | 7 | +| 210 | 2 | +| 207 | 2 | +| 210 | 7 | + +### **Output:** +| contest_id | percentage | +| ---------- | ---------- | +| 208 | 100.00 | +| 209 | 100.00 | +| 210 | 100.00 | +| 215 | 66.67 | +| 207 | 33.33 | + +### **Explanation:** +- For **contest 208, 209, and 210**, **100%** of users attended these contests. +- For **contest 215**, **66.67%** of users attended it (Alice and Alex). +- For **contest 207**, only **33.33%** of users attended it (Bob). + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +1. Calculate the total number of users in the **Users** table using `SELECT COUNT(*) FROM Users`. +2. For each contest, count how many users registered for that contest in the **Register** table. +3. Calculate the percentage of users who registered for each contest using the formula: + + \[ + \text{Percentage} = \left( \frac{\text{Registered Users}}{\text{Total Users}} \right) \times 100 + \] + +4. Round the percentage to **two decimal places**. +5. Sort the results by **percentage** in descending order. If there’s a tie, sort by **contest_id** in ascending order. + +```sql +SELECT contest_id, ROUND((COUNT(user_id) / (SELECT COUNT(*) FROM Users) * 100), 2) AS percentage +FROM Register +GROUP BY contest_id +ORDER BY percentage DESC, contest_id ASC; +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Contest-Percentage +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/percentage-of-users-attended-a-contest/) +- πŸ“ [MySQL Aggregate Functions](https://dev.mysql.com/doc/refman/8.0/en/group-by-functions.html) +- πŸ” [SQL ROUND Function](https://www.w3schools.com/sql/func_mysql_round.asp) diff --git a/LeetCode SQL 50 Solution/1667. Fix Names in a Table/1667. Fix Names in a Table.py b/LeetCode SQL 50 Solution/1667. Fix Names in a Table/1667. Fix Names in a Table.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1667. Fix Names in a Table/1667. Fix Names in a Table.sql b/LeetCode SQL 50 Solution/1667. Fix Names in a Table/1667. Fix Names in a Table.sql new file mode 100644 index 0000000..a092a51 --- /dev/null +++ b/LeetCode SQL 50 Solution/1667. Fix Names in a Table/1667. Fix Names in a Table.sql @@ -0,0 +1,53 @@ +1667. Fix Names in a Table +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Users + ++----------------+---------+ +| Column Name | Type | ++----------------+---------+ +| user_id | int | +| name | varchar | ++----------------+---------+ +user_id is the primary key (column with unique values) for this table. +This table contains the ID and the name of the user. The name consists of only lowercase and uppercase characters. + + +Write a solution to fix the names so that only the first character is uppercase and the rest are lowercase. + +Return the result table ordered by user_id. + +The result format is in the following example. + + + +Example 1: + +Input: +Users table: ++---------+-------+ +| user_id | name | ++---------+-------+ +| 1 | aLice | +| 2 | bOB | ++---------+-------+ +Output: ++---------+-------+ +| user_id | name | ++---------+-------+ +| 1 | Alice | +| 2 | Bob | ++---------+-------+ + + + + +# Write your MySQL query statement below +SELECT user_id, + CONCAT(UPPER(LEFT(name, 1)), LOWER(SUBSTRING(name, 2))) AS name +FROM Users +ORDER BY user_id; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1667. Fix Names in a Table/readme.md b/LeetCode SQL 50 Solution/1667. Fix Names in a Table/readme.md new file mode 100644 index 0000000..cd2e4c9 --- /dev/null +++ b/LeetCode SQL 50 Solution/1667. Fix Names in a Table/readme.md @@ -0,0 +1,80 @@ +# πŸ“ Fix Names in a Table - LeetCode 1667 + +## πŸ“Œ Problem Statement +You are given a table **Users** that contains user IDs and names. + +- The **name column** contains names that are written in a **mixed-case format** (e.g., `aLice`, `bOB`). +- Your task is to **correct the formatting** so that: + - The **first letter** of the name is **uppercase**. + - The **remaining letters** are **lowercase**. +- The result should be **ordered by user_id**. + +--- + +## πŸ“Š Table Structure + +### **Users Table** +| Column Name | Type | +| ----------- | ------- | +| user_id | int | +| name | varchar | + +- `user_id` is the **primary key** for this table. +- `name` contains only **uppercase and lowercase** letters. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Users Table** +| user_id | name | +| ------- | ----- | +| 1 | aLice | +| 2 | bOB | + +### **Output:** +| user_id | name | +| ------- | ----- | +| 1 | Alice | +| 2 | Bob | + +### **Explanation:** +- The first letter of each name is converted to **uppercase**. +- The remaining letters are converted to **lowercase**. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +1. Use the `UPPER()` function to **capitalize** the **first letter** of the name. +2. Use the `LOWER()` function to convert the **rest of the name** to **lowercase**. +3. Use `LEFT(name, 1)` to extract the **first character** of the name. +4. Use `SUBSTRING(name, 2)` to extract the **rest of the name**. +5. Use `CONCAT()` to combine the capitalized first letter and the lowercase remaining part. +6. **Sort the output** by `user_id`. + +```sql +SELECT user_id, + CONCAT(UPPER(LEFT(name, 1)), LOWER(SUBSTRING(name, 2))) AS name +FROM Users +ORDER BY user_id; +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Fix-Names +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/fix-names-in-a-table/) +- πŸ” [SQL CONCAT Function](https://www.w3schools.com/sql/func_mysql_concat.asp) +- πŸ“ [MySQL String Functions](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html) diff --git a/LeetCode SQL 50 Solution/1683. Invalid Tweets/1683. Invalid Tweets.py b/LeetCode SQL 50 Solution/1683. Invalid Tweets/1683. Invalid Tweets.py new file mode 100644 index 0000000..e69de29 diff --git a/1683. Invalid Tweets.sql b/LeetCode SQL 50 Solution/1683. Invalid Tweets/1683. Invalid Tweets.sql similarity index 100% rename from 1683. Invalid Tweets.sql rename to LeetCode SQL 50 Solution/1683. Invalid Tweets/1683. Invalid Tweets.sql diff --git a/LeetCode SQL 50 Solution/1683. Invalid Tweets/readme.md b/LeetCode SQL 50 Solution/1683. Invalid Tweets/readme.md new file mode 100644 index 0000000..586e52c --- /dev/null +++ b/LeetCode SQL 50 Solution/1683. Invalid Tweets/readme.md @@ -0,0 +1,72 @@ +# 🐦 Invalid Tweets - LeetCode 1683 + +## πŸ“Œ Problem Statement +You are given a table **Tweets** that contains tweet IDs and their content. + +- A tweet is **invalid** if its content exceeds **15 characters**. +- Your task is to find and return the **IDs of all invalid tweets**. + +--- + +## πŸ“Š Table Structure + +### **Tweets Table** +| Column Name | Type | +| ----------- | ------- | +| tweet_id | int | +| content | varchar | + +- `tweet_id` is the **primary key**. +- `content` consists of **alphanumeric characters, '!', and spaces**. +- Tweets can have **a maximum of 15 characters** to be valid. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Tweets Table** +| tweet_id | content | +| -------- | --------------------------------- | +| 1 | Let us Code | +| 2 | More than fifteen chars are here! | + +### **Output:** +| tweet_id | +| -------- | +| 2 | + +### **Explanation:** +- **Tweet 1** has **11 characters**, so it is **valid**. +- **Tweet 2** has **33 characters**, so it is **invalid**. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +1. Use the `LENGTH()` function to get the **character length** of the tweet. +2. Filter rows where **content length > 15**. +3. Return only the `tweet_id` of invalid tweets. + +```sql +SELECT tweet_id +FROM Tweets +WHERE LENGTH(content) > 15; +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Invalid-Tweets +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/invalid-tweets/) +- πŸ“ [MySQL LENGTH Function](https://www.w3schools.com/sql/func_mysql_length.asp) diff --git a/LeetCode SQL 50 Solution/1729. Find Followers Count/1729. Find Followers Count.py b/LeetCode SQL 50 Solution/1729. Find Followers Count/1729. Find Followers Count.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1729. Find Followers Count/1729. Find Followers Count.sql b/LeetCode SQL 50 Solution/1729. Find Followers Count/1729. Find Followers Count.sql new file mode 100644 index 0000000..79fca7a --- /dev/null +++ b/LeetCode SQL 50 Solution/1729. Find Followers Count/1729. Find Followers Count.sql @@ -0,0 +1,51 @@ +1729. Find Followers Count + ++-------------+------+ +| Column Name | Type | ++-------------+------+ +| user_id | int | +| follower_id | int | ++-------------+------+ +(user_id, follower_id) is the primary key (combination of columns with unique values) for this table. +This table contains the IDs of a user and a follower in a social media app where the follower follows the user. + + +Write a solution that will, for each user, return the number of followers. + +Return the result table ordered by user_id in ascending order. + +The result format is in the following example. + + + +Example 1: + +Input: +Followers table: ++---------+-------------+ +| user_id | follower_id | ++---------+-------------+ +| 0 | 1 | +| 1 | 0 | +| 2 | 0 | +| 2 | 1 | ++---------+-------------+ +Output: ++---------+----------------+ +| user_id | followers_count| ++---------+----------------+ +| 0 | 1 | +| 1 | 1 | +| 2 | 2 | ++---------+----------------+ +Explanation: +The followers of 0 are {1} +The followers of 1 are {0} +The followers of 2 are {0,1} + + +# Write your MySQL query statement below +SELECT user_id, COUNT(follower_id) AS followers_count +FROM Followers +GROUP BY user_id +ORDER BY user_id; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1729. Find Followers Count/readme.md b/LeetCode SQL 50 Solution/1729. Find Followers Count/readme.md new file mode 100644 index 0000000..54fa1e2 --- /dev/null +++ b/LeetCode SQL 50 Solution/1729. Find Followers Count/readme.md @@ -0,0 +1,106 @@ +# πŸ“Š Find Followers Count - LeetCode 1729 + +## πŸ“Œ Problem Statement +You are given a table **Followers** that contains the following information: + +- `user_id`: The ID of the user being followed. +- `follower_id`: The ID of the user who is following. + +Your task is to return a list of users with their **follower count**, sorted in **ascending order of `user_id`**. + +--- + +## πŸ“Š Table Structure + +### **Followers Table** +| Column Name | Type | +| ----------- | ---- | +| user_id | int | +| follower_id | int | + +- `(user_id, follower_id)` is the **primary key**. +- Each row represents a **follower relationship** between two users. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Followers Table** +| user_id | follower_id | +| ------- | ----------- | +| 0 | 1 | +| 1 | 0 | +| 2 | 0 | +| 2 | 1 | + +### **Output:** +| user_id | followers_count | +| ------- | --------------- | +| 0 | 1 | +| 1 | 1 | +| 2 | 2 | + +### **Explanation:** +- **User 0** has **1 follower** `{1}`. +- **User 1** has **1 follower** `{0}`. +- **User 2** has **2 followers** `{0, 1}`. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +1. Use `COUNT(follower_id)` to count the number of followers for each `user_id`. +2. Use `GROUP BY user_id` to group the followers for each user. +3. Sort the result by `user_id` in **ascending order**. + +```sql +SELECT user_id, COUNT(follower_id) AS followers_count +FROM Followers +GROUP BY user_id +ORDER BY user_id; +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. Use `groupby("user_id")` to count followers for each user. +2. Use `reset_index(name="followers_count")` to format the result properly. +3. Sort the result by `user_id`. + +```python +import pandas as pd + +def find_followers_count(followers: pd.DataFrame) -> pd.DataFrame: + result = ( + followers.groupby("user_id")["follower_id"] + .count() + .reset_index(name="followers_count") + .sort_values("user_id") + ) + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Find-Followers-Count +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution.py +│── πŸ“œ test_cases.sql +│── πŸ“œ test_cases.csv +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/find-followers-count/) +- πŸ“ [MySQL COUNT Function](https://www.w3schools.com/sql/sql_count.asp) +- 🐍 [Pandas GroupBy](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html) +``` + diff --git a/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/1731. The Number of Employees Which Report to Each Employee.py b/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/1731. The Number of Employees Which Report to Each Employee.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/1731. The Number of Employees Which Report to Each Employee.sql b/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/1731. The Number of Employees Which Report to Each Employee.sql new file mode 100644 index 0000000..fabb660 --- /dev/null +++ b/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/1731. The Number of Employees Which Report to Each Employee.sql @@ -0,0 +1,88 @@ +1731. The Number of Employees Which Report to Each Employee +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Employees + ++-------------+----------+ +| Column Name | Type | ++-------------+----------+ +| employee_id | int | +| name | varchar | +| reports_to | int | +| age | int | ++-------------+----------+ +employee_id is the column with unique values for this table. +This table contains information about the employees and the id of the manager they report to. Some employees do not report to anyone (reports_to is null). + + +For this problem, we will consider a manager an employee who has at least 1 other employee reporting to them. + +Write a solution to report the ids and the names of all managers, the number of employees who report directly to them, and the average age of the reports rounded to the nearest integer. + +Return the result table ordered by employee_id. + +The result format is in the following example. + + + +Example 1: + +Input: +Employees table: ++-------------+---------+------------+-----+ +| employee_id | name | reports_to | age | ++-------------+---------+------------+-----+ +| 9 | Hercy | null | 43 | +| 6 | Alice | 9 | 41 | +| 4 | Bob | 9 | 36 | +| 2 | Winston | null | 37 | ++-------------+---------+------------+-----+ +Output: ++-------------+-------+---------------+-------------+ +| employee_id | name | reports_count | average_age | ++-------------+-------+---------------+-------------+ +| 9 | Hercy | 2 | 39 | ++-------------+-------+---------------+-------------+ +Explanation: Hercy has 2 people report directly to him, Alice and Bob. Their average age is (41+36)/2 = 38.5, which is 39 after rounding it to the nearest integer. +Example 2: + +Input: +Employees table: ++-------------+---------+------------+-----+ +| employee_id | name | reports_to | age | +|-------------|---------|------------|-----| +| 1 | Michael | null | 45 | +| 2 | Alice | 1 | 38 | +| 3 | Bob | 1 | 42 | +| 4 | Charlie | 2 | 34 | +| 5 | David | 2 | 40 | +| 6 | Eve | 3 | 37 | +| 7 | Frank | null | 50 | +| 8 | Grace | null | 48 | ++-------------+---------+------------+-----+ +Output: ++-------------+---------+---------------+-------------+ +| employee_id | name | reports_count | average_age | +| ----------- | ------- | ------------- | ----------- | +| 1 | Michael | 2 | 40 | +| 2 | Alice | 2 | 37 | +| 3 | Bob | 1 | 37 | ++-------------+---------+---------------+-------------+ + + +# Write your MySQL query statement below + +SELECT + Manager.employee_id, + Manager.name, + COUNT(Employee.employee_id) AS reports_count, + ROUND(AVG(Employee.age)) AS average_age +FROM Employees AS Manager +INNER JOIN Employees AS Employee + ON (Employee.reports_to = Manager.employee_id) +GROUP BY 1 +ORDER BY 1; diff --git a/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/readme.md b/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/readme.md new file mode 100644 index 0000000..8d6ef33 --- /dev/null +++ b/LeetCode SQL 50 Solution/1731. The Number of Employees Which Report to Each Employee/readme.md @@ -0,0 +1,134 @@ +# πŸ‘₯ The Number of Employees Which Report to Each Employee - LeetCode 1731 + +## πŸ“Œ Problem Statement +You are given a table **Employees** that contains the following columns: +- `employee_id`: The unique ID of the employee. +- `name`: The name of the employee. +- `reports_to`: The `employee_id` of the manager the employee reports to (can be `NULL` if the employee does not report to anyone). +- `age`: The age of the employee. + +A manager is defined as an employee who has **at least 1 direct report**. +Your task is to report: +- The **IDs and names of all managers**. +- The **number of employees** who report **directly** to them. +- The **average age** of their direct reports, rounded to the nearest integer. + +Return the result **ordered by `employee_id`** in ascending order. + +--- + +## πŸ“Š Table Structure + +### **Employees Table** +| Column Name | Type | +| ----------- | ------- | +| employee_id | int | +| name | varchar | +| reports_to | int | +| age | int | + +- `employee_id` is the **primary key**. +- `reports_to` may be `NULL` for employees who do not report to anyone. + +--- + +## πŸ“Š Example 1: + +### **Input:** +| employee_id | name | reports_to | age | +| ----------- | ------- | ---------- | --- | +| 9 | Hercy | NULL | 43 | +| 6 | Alice | 9 | 41 | +| 4 | Bob | 9 | 36 | +| 2 | Winston | NULL | 37 | + +### **Output:** +| employee_id | name | reports_count | average_age | +| ----------- | ----- | ------------- | ----------- | +| 9 | Hercy | 2 | 39 | + +### **Explanation:** +- **Hercy** (employee_id = 9) is a manager with two direct reports: **Alice** (age 41) and **Bob** (age 36). +- The average age of these reports is (41 + 36) / 2 = 38.5, which is rounded to **39**. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +1. Use a **self-join** on the **Employees** table where the employee’s `reports_to` matches the manager’s `employee_id`. +2. Count the number of direct reports for each manager. +3. Compute the average age of the direct reports and round the result to the nearest integer. +4. Group by the manager’s `employee_id` and order the results by `employee_id`. + +```sql +SELECT + Manager.employee_id, + Manager.name, + COUNT(Employee.employee_id) AS reports_count, + ROUND(AVG(Employee.age)) AS average_age +FROM Employees AS Manager +INNER JOIN Employees AS Employee + ON Employee.reports_to = Manager.employee_id +GROUP BY Manager.employee_id +ORDER BY Manager.employee_id; +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. Filter the DataFrame to create a join between managers and their direct reports. +2. Group by the manager’s `employee_id` and compute: + - The count of direct reports. + - The average age of the reports, then round the average. +3. Merge the results with the original manager information. +4. Sort the result by `employee_id`. + +```python +import pandas as pd + +def employees_reporting(employees: pd.DataFrame) -> pd.DataFrame: + # Merge the table with itself: one for managers and one for employees reporting to them. + merged = employees.merge( + employees, + left_on='employee_id', + right_on='reports_to', + suffixes=('_manager', '_report') + ) + + # Group by manager's employee_id and name, then compute the count and average age of reports. + result = merged.groupby(['employee_id_manager', 'name_manager']).agg( + reports_count=('employee_id_report', 'count'), + average_age=('age_report', lambda x: round(x.mean())) + ).reset_index() + + # Rename columns to match expected output. + result.rename(columns={ + 'employee_id_manager': 'employee_id', + 'name_manager': 'name' + }, inplace=True) + + # Sort by employee_id in ascending order. + result = result.sort_values('employee_id').reset_index(drop=True) + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Employees-Reporting +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/the-number-of-employees-which-report-to-each-employee/) +- πŸ” [MySQL GROUP BY Documentation](https://www.w3schools.com/sql/sql_groupby.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/1757. Recyclable and Low Fat Products.py b/LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/1757. Recyclable and Low Fat Products.py new file mode 100644 index 0000000..e69de29 diff --git a/1757. Recyclable and Low Fat Products.sql b/LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/1757. Recyclable and Low Fat Products.sql similarity index 97% rename from 1757. Recyclable and Low Fat Products.sql rename to LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/1757. Recyclable and Low Fat Products.sql index fd2c848..65f4ac5 100644 --- a/1757. Recyclable and Low Fat Products.sql +++ b/LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/1757. Recyclable and Low Fat Products.sql @@ -1,4 +1,4 @@ -# Write your MySQL query statement below -select product_id -from products +# Write your MySQL query statement below +select product_id +from products where low_fats = 'Y' and recyclable = 'Y' \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/readme.md b/LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/readme.md new file mode 100644 index 0000000..74614f9 --- /dev/null +++ b/LeetCode SQL 50 Solution/1757. Recyclable and Low Fat Products/readme.md @@ -0,0 +1,100 @@ +# ♻️ Recyclable and Low Fat Products - LeetCode 1757 + +## πŸ“Œ Problem Statement +You are given a table **Products** that contains information about products with respect to their fat content and recyclability. + +- The **low_fats** column is an ENUM with values `'Y'` and `'N'`, where `'Y'` indicates the product is low fat. +- The **recyclable** column is an ENUM with values `'Y'` and `'N'`, where `'Y'` indicates the product is recyclable. + +Your task is to **find the IDs of products that are both low fat and recyclable**. + +Return the result in **any order**. + +--- + +## πŸ“Š Table Structure + +### **Products Table** +| Column Name | Type | +| ----------- | ---- | +| product_id | int | +| low_fats | enum | +| recyclable | enum | + +- `product_id` is the **primary key**. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Products Table** +| product_id | low_fats | recyclable | +| ---------- | -------- | ---------- | +| 0 | Y | N | +| 1 | Y | Y | +| 2 | N | Y | +| 3 | Y | Y | +| 4 | N | N | + +### **Output:** +| product_id | +| ---------- | +| 1 | +| 3 | + +### **Explanation:** +- Only products with `product_id` **1** and **3** are **both low fat and recyclable**. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +- Filter the **Products** table for rows where `low_fats = 'Y'` and `recyclable = 'Y'`. +- Return the corresponding `product_id`. + +```sql +SELECT product_id +FROM Products +WHERE low_fats = 'Y' AND recyclable = 'Y'; +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. Load the **Products** table into a Pandas DataFrame. +2. Filter the DataFrame to keep rows where both `low_fats` and `recyclable` are `'Y'`. +3. Select and return the `product_id` column. + +```python +import pandas as pd + +def recyclable_low_fat_products(products: pd.DataFrame) -> pd.DataFrame: + # Filter rows where both low_fats and recyclable are 'Y' + filtered = products[(products['low_fats'] == 'Y') & (products['recyclable'] == 'Y')] + # Select only the product_id column + result = filtered[['product_id']] + return result +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Recyclable-Low-Fat-Products +│── πŸ“œ README.md +│── πŸ“œ solution.sql +│── πŸ“œ solution_pandas.py +│── πŸ“œ test_cases.sql +│── πŸ“œ sample_data.csv +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/recyclable-and-low-fat-products/) +- πŸ” [MySQL WHERE Clause](https://www.w3schools.com/sql/sql_where.asp) +- 🐍 [Pandas DataFrame Filtering](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.loc.html) diff --git a/LeetCode SQL 50 Solution/176. Second heighest salary/176. Second Highest Salary.py b/LeetCode SQL 50 Solution/176. Second heighest salary/176. Second Highest Salary.py new file mode 100644 index 0000000..e50a809 --- /dev/null +++ b/LeetCode SQL 50 Solution/176. Second heighest salary/176. Second Highest Salary.py @@ -0,0 +1,6 @@ +import pandas as pd + +def second_highest_salary(employee: pd.DataFrame) -> pd.DataFrame: + unique_salaries = employee['salary'].drop_duplicates().sort_values(ascending=False) + second_highest = unique_salaries.iloc[1] if len(unique_salaries) > 1 else None + return pd.DataFrame({'SecondHighestSalary': [second_highest]}) diff --git a/LeetCode SQL 50 Solution/176. Second heighest salary/176. Second heighest salary.sql b/LeetCode SQL 50 Solution/176. Second heighest salary/176. Second heighest salary.sql new file mode 100644 index 0000000..1ac922f --- /dev/null +++ b/LeetCode SQL 50 Solution/176. Second heighest salary/176. Second heighest salary.sql @@ -0,0 +1,17 @@ +# Solution 1 +# Write your MSSQL query statement below +select max(salary) as SecondHighestSalary from employee +where salary not in (select max(salary) from employee) + + + +# Solution 2 +# Write your MSSQL query statement below +# Write your MySQL query statement below +SELECT + ( + SELECT DISTINCT salary + FROM Employee + ORDER BY salary DESC + LIMIT 1, 1 + ) AS SecondHighestSalary; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/176. Second heighest salary/readme.md b/LeetCode SQL 50 Solution/176. Second heighest salary/readme.md new file mode 100644 index 0000000..76e32e9 --- /dev/null +++ b/LeetCode SQL 50 Solution/176. Second heighest salary/readme.md @@ -0,0 +1,99 @@ +# 176. Second Highest Salary + +## Problem Statement +You are given a table `Employee` containing the salaries of employees. The goal is to find the second highest distinct salary from this table. If there is no second highest salary, return `NULL` (or `None` in Pandas). + +### Table: Employee + +| Column Name | Type | +| ----------- | ---- | +| id | int | +| salary | int | + +- `id` is the primary key for this table. +- Each row contains salary information for an employee. + +## Example 1: + +### **Input:** + +| id | salary | +| --- | ------ | +| 1 | 100 | +| 2 | 200 | +| 3 | 300 | + +### **Output:** + +| SecondHighestSalary | +| ------------------- | +| 200 | + +## Example 2: + +### **Input:** + +| id | salary | +| --- | ------ | +| 1 | 100 | + +### **Output:** + +| SecondHighestSalary | +| ------------------- | +| NULL | + +--- +## **Approach** + +### **SQL Approach** +1. **Use a Window Function:** + - Apply `DENSE_RANK()` to rank salaries in descending order. + - Assign rank `1` to the highest salary, `2` to the second highest, and so on. +2. **Filter by Rank:** + - Select the salary where `rank = 2`. + - If no second highest salary exists, return `NULL`. + +--- +## **Solution** + +```sql +WITH RankedEmployees AS ( + SELECT *, DENSE_RANK() OVER(ORDER BY salary DESC) AS `rank` + FROM Employee +) +SELECT MAX(salary) AS SecondHighestSalary +FROM RankedEmployees +WHERE `rank` = 2; +``` + +--- +## **File Structure** +``` +πŸ“‚ SecondHighestSalary +β”œβ”€β”€ πŸ“„ README.md # Problem statement, approach, and solution +β”œβ”€β”€ πŸ“„ solution.sql # SQL query file +β”œβ”€β”€ πŸ“„ solution_pandas.py # Pandas solution file +``` + +--- +## **Alternative Pandas Approach** + +```python +import pandas as pd + +def second_highest_salary(employee: pd.DataFrame) -> pd.DataFrame: + unique_salaries = employee['salary'].drop_duplicates().nlargest(2) + second_highest = unique_salaries.iloc[1] if len(unique_salaries) > 1 else None + return pd.DataFrame({'SecondHighestSalary': [second_highest]}) +``` + +--- +## **Resources & References** +- [LeetCode Problem Link](https://leetcode.com/problems/second-highest-salary/) +- [SQL DENSE_RANK() Documentation](https://www.sqlservertutorial.net/sql-server-window-functions/sql-server-dense_rank-function/) + +--- +## **Contribute** +Feel free to contribute by submitting an issue or a pull request! + diff --git a/LeetCode SQL 50 Solution/177. Nth Highest Salary/177. Nth Highest Salary.py b/LeetCode SQL 50 Solution/177. Nth Highest Salary/177. Nth Highest Salary.py new file mode 100644 index 0000000..9136333 --- /dev/null +++ b/LeetCode SQL 50 Solution/177. Nth Highest Salary/177. Nth Highest Salary.py @@ -0,0 +1,7 @@ +import pandas as pd + +def getNthHighestSalary(employee: pd.DataFrame, N: int) -> pd.DataFrame: + unique_salaries = employee['salary'].drop_duplicates().nlargest(N) + if len(unique_salaries) < N: + return pd.DataFrame({"getNthHighestSalary(N)": [None]}) + return pd.DataFrame({"getNthHighestSalary(N)": [unique_salaries.iloc[-1]]}) \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/177. Nth Highest Salary/177. Nth Highest Salary.sql b/LeetCode SQL 50 Solution/177. Nth Highest Salary/177. Nth Highest Salary.sql new file mode 100644 index 0000000..6dfb0ac --- /dev/null +++ b/LeetCode SQL 50 Solution/177. Nth Highest Salary/177. Nth Highest Salary.sql @@ -0,0 +1,12 @@ +# Write your MySQL query statement below. +CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT +BEGIN +SET N = N-1; + RETURN ( + SELECT DISTINCT(salary) from Employee order by salary DESC + LIMIT 1 OFFSET N + + ); +END + +# pls upvote if you find solution easy to undestand....!! Thanks..!!! \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/177. Nth Highest Salary/readme.md b/LeetCode SQL 50 Solution/177. Nth Highest Salary/readme.md new file mode 100644 index 0000000..4d13d7e --- /dev/null +++ b/LeetCode SQL 50 Solution/177. Nth Highest Salary/readme.md @@ -0,0 +1,121 @@ +# 177. Nth Highest Salary + +## Problem Statement +Given a table `Employee`, write a SQL query to find the `nth` highest salary. If there is no `nth` highest salary, return `null`. + +### Table Schema: `Employee` +| Column Name | Type | +| ----------- | ---- | +| id | int | +| salary | int | + +- `id` is the primary key (unique values for employees). +- `salary` column contains employee salary details. + +### Example 1: +#### **Input:** +```sql ++----+--------+ +| id | salary | ++----+--------+ +| 1 | 100 | +| 2 | 200 | +| 3 | 300 | ++----+--------+ +n = 2 +``` +#### **Output:** +```sql ++------------------------+ +| getNthHighestSalary(2) | ++------------------------+ +| 200 | ++------------------------+ +``` + +### Example 2: +#### **Input:** +```sql ++----+--------+ +| id | salary | ++----+--------+ +| 1 | 100 | ++----+--------+ +n = 2 +``` +#### **Output:** +```sql ++------------------------+ +| getNthHighestSalary(2) | ++------------------------+ +| null | ++------------------------+ +``` + +--- + +## Approach +1. Use the `DENSE_RANK()` function to rank salaries in descending order. +2. Filter for the `nth` highest salary using a `WHERE` clause. +3. If there is no `nth` highest salary, return `NULL`. + +--- + +## SQL Solution +```sql +CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT AS +BEGIN + RETURN ( + SELECT DISTINCT salary FROM Employee + ORDER BY salary DESC + LIMIT 1 OFFSET N-1 + ); +END; +``` + +### Explanation: +- `ORDER BY salary DESC` sorts salaries in descending order. +- `LIMIT 1 OFFSET N-1` fetches the `nth` highest salary. +- If `N` is larger than the number of salaries, `NULL` is returned. + +--- + +## Pandas Solution +```python +import pandas as pd + +def getNthHighestSalary(employee: pd.DataFrame, N: int) -> pd.DataFrame: + unique_salaries = employee['salary'].drop_duplicates().nlargest(N) + if len(unique_salaries) < N: + return pd.DataFrame({"getNthHighestSalary(N)": [None]}) + return pd.DataFrame({"getNthHighestSalary(N)": [unique_salaries.iloc[-1]]}) +``` + +### Explanation: +- `drop_duplicates()` removes duplicate salaries. +- `nlargest(N)` gets the `N` highest salaries. +- If `N` is greater than available salaries, return `None`. + +--- + +## File Structure +``` +πŸ“‚ nth_highest_salary + β”œβ”€β”€ πŸ“„ README.md # Problem statement, approach, solution + β”œβ”€β”€ πŸ“„ nth_highest_salary.sql # SQL Solution + β”œβ”€β”€ πŸ“„ nth_highest_salary.py # Pandas Solution + └── πŸ“„ example_input_output.txt # Sample input & expected output +``` + +--- + +## References +- [LeetCode Problem #177](https://leetcode.com/problems/nth-highest-salary/) +- [MySQL Documentation - LIMIT & OFFSET](https://dev.mysql.com/doc/refman/8.0/en/select.html) +- [Pandas Documentation](https://pandas.pydata.org/docs/) + +--- + +### Contributors +- **[Antim Pal]** πŸš€ + diff --git a/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/1789. Primary Department for Each Employee.py b/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/1789. Primary Department for Each Employee.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/1789. Primary Department for Each Employee.sql b/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/1789. Primary Department for Each Employee.sql new file mode 100644 index 0000000..9d88082 --- /dev/null +++ b/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/1789. Primary Department for Each Employee.sql @@ -0,0 +1,70 @@ +1789. Primary Department for Each Employee + + ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| employee_id | int | +| department_id | int | +| primary_flag | varchar | ++---------------+---------+ +(employee_id, department_id) is the primary key (combination of columns with unique values) for this table. +employee_id is the id of the employee. +department_id is the id of the department to which the employee belongs. +primary_flag is an ENUM (category) of type ('Y', 'N'). If the flag is 'Y', the department is the primary department for the employee. If the flag is 'N', the department is not the primary. + + +Employees can belong to multiple departments. When the employee joins other departments, they need to decide which department is their primary department. Note that when an employee belongs to only one department, their primary column is 'N'. + +Write a solution to report all the employees with their primary department. For employees who belong to one department, report their only department. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Employee table: ++-------------+---------------+--------------+ +| employee_id | department_id | primary_flag | ++-------------+---------------+--------------+ +| 1 | 1 | N | +| 2 | 1 | Y | +| 2 | 2 | N | +| 3 | 3 | N | +| 4 | 2 | N | +| 4 | 3 | Y | +| 4 | 4 | N | ++-------------+---------------+--------------+ +Output: ++-------------+---------------+ +| employee_id | department_id | ++-------------+---------------+ +| 1 | 1 | +| 2 | 1 | +| 3 | 3 | +| 4 | 3 | ++-------------+---------------+ +Explanation: +- The Primary department for employee 1 is 1. +- The Primary department for employee 2 is 1. +- The Primary department for employee 3 is 3. +- The Primary department for employee 4 is 3. + + +# Write your MySQL query statement below +SELECT + employee_id, + department_id +FROM Employee +WHERE primary_flag = 'Y' +UNION DISTINCT +SELECT + employee_id, + department_id +FROM Employee +GROUP BY 1 +HAVING COUNT(*) = 1; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/readme.md b/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/readme.md new file mode 100644 index 0000000..db5ce3b --- /dev/null +++ b/LeetCode SQL 50 Solution/1789. Primary Department for Each Employee/readme.md @@ -0,0 +1,128 @@ +# 🏒 Primary Department for Each Employee - LeetCode 1789 + +## πŸ“Œ Problem Statement +You are given a table **Employee** that contains the following columns: + +- **employee_id**: The ID of the employee. +- **department_id**: The ID of the department to which the employee belongs. +- **primary_flag**: An ENUM ('Y', 'N'). + - If `primary_flag` is `'Y'`, then the department is the primary department for that employee. + - If `primary_flag` is `'N'`, then the department is not primary. + +**Note:** +- An employee can belong to multiple departments. When an employee joins multiple departments, they decide which one is their primary (set to `'Y'`). +- If an employee belongs to only one department, then their `primary_flag` is `'N'`, but that department is still considered their primary department. + +Your task is to **report all employees with their primary department**. +For employees who belong to only one department, report that department. + +Return the result table in **any order**. + +--- + +## πŸ“Š Table Structure + +### **Employee Table** +| Column Name | Type | +| ------------- | ------- | +| employee_id | int | +| department_id | int | +| primary_flag | varchar | + +- `(employee_id, department_id)` is the **primary key** for this table. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Employee Table** +| employee_id | department_id | primary_flag | +| ----------- | ------------- | ------------ | +| 1 | 1 | N | +| 2 | 1 | Y | +| 2 | 2 | N | +| 3 | 3 | N | +| 4 | 2 | N | +| 4 | 3 | Y | +| 4 | 4 | N | + +### **Output:** +| employee_id | department_id | +| ----------- | ------------- | +| 1 | 1 | +| 2 | 1 | +| 3 | 3 | +| 4 | 3 | + +### **Explanation:** +- **Employee 1** belongs to only one department (1), so department 1 is their primary. +- **Employee 2** belongs to departments 1 and 2. The row with `primary_flag = 'Y'` indicates that department 1 is their primary. +- **Employee 3** belongs only to department 3. +- **Employee 4** belongs to departments 2, 3, and 4. The row with `primary_flag = 'Y'` indicates that department 3 is their primary. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +- **Step 1:** For employees who have `primary_flag = 'Y'`, choose those rows. +- **Step 2:** For employees who belong to only one department, return that row. +- Combine the results using `UNION DISTINCT`. + +```sql +SELECT employee_id, department_id +FROM Employee +WHERE primary_flag = 'Y' +UNION DISTINCT +SELECT employee_id, department_id +FROM Employee +GROUP BY employee_id +HAVING COUNT(*) = 1; +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. **Group** the DataFrame by `employee_id`. +2. For each group: + - If any row has `primary_flag == 'Y'`, choose the first such row. + - Otherwise (i.e., employee belongs to only one department), choose that row. +3. Return the resulting DataFrame with only `employee_id` and `department_id`. + +```python +import pandas as pd + +def primary_department(employees: pd.DataFrame) -> pd.DataFrame: + def select_primary(group): + # If there's any row with primary_flag 'Y', choose the first one + if (group['primary_flag'] == 'Y').any(): + return group[group['primary_flag'] == 'Y'].iloc[0] + else: + # For employees with only one department + return group.iloc[0] + + result = employees.groupby('employee_id').apply(select_primary).reset_index(drop=True) + return result[['employee_id', 'department_id']] +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Primary-Department +│── README.md +│── solution.sql +│── solution_pandas.py +│── test_cases.sql +│── sample_data.csv +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/primary-department-for-each-employee/) +- πŸ” [MySQL UNION Operator](https://www.w3schools.com/sql/sql_union.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/180. Consecutive Numbers/180. Consecutive Numbers.py b/LeetCode SQL 50 Solution/180. Consecutive Numbers/180. Consecutive Numbers.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/180. Consecutive Numbers/180. Consecutive Numbers.sql b/LeetCode SQL 50 Solution/180. Consecutive Numbers/180. Consecutive Numbers.sql new file mode 100644 index 0000000..3a3943a --- /dev/null +++ b/LeetCode SQL 50 Solution/180. Consecutive Numbers/180. Consecutive Numbers.sql @@ -0,0 +1,133 @@ +180. Consecutive Numbers +""" +# Write your MySQL query statement below ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| num | varchar | ++-------------+---------+ +In SQL, id is the primary key for this table. +id is an autoincrement column starting from 1. + + +Find all numbers that appear at least three times consecutively. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Logs table: ++----+-----+ +| id | num | ++----+-----+ +| 1 | 1 | +| 2 | 1 | +| 3 | 1 | +| 4 | 2 | +| 5 | 1 | +| 6 | 2 | +| 7 | 2 | ++----+-----+ +Output: ++-----------------+ +| ConsecutiveNums | ++-----------------+ +| 1 | ++-----------------+ + +""" +Explanation: 1 is the only number that appears consecutively for at least three times. + +# Write your MySQL query statement below +with consecutive_runs as ( + select id, num, lead(num, 1) over (order by id) as lead_num, lag(num, 1) over (order by id) as lag_num + from Logs +) +select distinct num as ConsecutiveNums +from consecutive_runs +where num = lead_num and num = lag_num + +""" +Explanation + Common Table Expression (CTE): consecutive_runs +sql +Copy +Edit +with consecutive_runs as ( + select + id, + num, + lead(num, 1) over (order by id) as lead_num, + lag(num, 1) over (order by id) as lag_num + from Logs +) +Purpose of the CTE: +This part creates a temporary result set named consecutive_runs that enriches each row from the Logs table with two extra columns: + +lead_num: The value of the num column in the next row (based on the id order). +lag_num: The value of the num column in the previous row (based on the id order). +Window Functions: + +lead(num, 1) over (order by id): +This function returns the value of num from the row immediately following the current one when sorted by id. +lag(num, 1) over (order by id): +This function returns the value of num from the row immediately preceding the current one when sorted by id. +This setup allows us to compare each row with its immediate neighbors. + +2. Final SELECT Query +sql +Copy +Edit +select distinct num as ConsecutiveNums +from consecutive_runs +where num = lead_num and num = lag_num +Filtering Condition: +The WHERE clause checks if the current row's num value is equal to both its next (lead_num) and previous (lag_num) values: + +num = lead_num +num = lag_num +This ensures that the number appears consecutively (at least three times in a row). + +DISTINCT Keyword: +The distinct keyword makes sure that each number is listed only once in the final output, even if it occurs in multiple consecutive sequences. + +Result Column Alias: +The output column is renamed to ConsecutiveNums for clarity. + +Summary +CTE Usage: +The query first computes additional columns using window functions (lead and lag) to look at neighboring rows. + +Consecutive Check: +It then filters out rows where the current value is the same as both the previous and next value, meaning there are at least three consecutive occurrences of that number. + +Final Output: +The final result is a list of distinct numbers that appear consecutively in the Logs table. +""" + + +# Write your MySQL query statement below +# Write your MySQL query statement below +WITH + T AS (SELECT DISTINCT product_id FROM Products), + P AS ( + SELECT product_id, new_price AS price + FROM Products + WHERE + (product_id, change_date) IN ( + SELECT product_id, MAX(change_date) AS change_date + FROM Products + WHERE change_date <= '2019-08-16' + GROUP BY 1 + ) + ) +SELECT product_id, IFNULL(price, 10) AS price +FROM + T + LEFT JOIN P USING (product_id); diff --git a/LeetCode SQL 50 Solution/180. Consecutive Numbers/readme.md b/LeetCode SQL 50 Solution/180. Consecutive Numbers/readme.md new file mode 100644 index 0000000..73e7b25 --- /dev/null +++ b/LeetCode SQL 50 Solution/180. Consecutive Numbers/readme.md @@ -0,0 +1,101 @@ +# 180. Consecutive Numbers + +## Problem Statement +You are given a table `Logs` with the following structure: + +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| num | varchar | ++-------------+---------+ +``` +- `id` is the primary key and auto-increments starting from 1. +- Find all numbers that appear **at least three times consecutively**. +- Return the result table in **any order**. + +## Example 1: + +**Input:** + +``` +Logs table: ++----+-----+ +| id | num | ++----+-----+ +| 1 | 1 | +| 2 | 1 | +| 3 | 1 | +| 4 | 2 | +| 5 | 1 | +| 6 | 2 | +| 7 | 2 | ++----+-----+ +``` + +**Output:** + +``` ++-----------------+ +| ConsecutiveNums | ++-----------------+ +| 1 | ++-----------------+ +``` + +--- + +## Solution Approaches + +### **SQL Solution (Using Self Join)** +```sql +SELECT DISTINCT l1.num AS ConsecutiveNums +FROM Logs l1 +JOIN Logs l2 ON l1.id = l2.id - 1 AND l1.num = l2.num +JOIN Logs l3 ON l1.id = l3.id - 2 AND l1.num = l3.num; +``` + +### **SQL Solution (Using Window Functions)** +```sql +SELECT DISTINCT num AS ConsecutiveNums +FROM ( + SELECT num, LAG(num,1) OVER (ORDER BY id) AS prev1, + LAG(num,2) OVER (ORDER BY id) AS prev2 + FROM Logs +) temp +WHERE num = prev1 AND num = prev2; +``` + +### **Pandas Solution** +```python +import pandas as pd + +def consecutive_numbers(logs: pd.DataFrame) -> pd.DataFrame: + logs['prev1'] = logs['num'].shift(1) + logs['prev2'] = logs['num'].shift(2) + + result = logs[(logs['num'] == logs['prev1']) & (logs['num'] == logs['prev2'])] + return pd.DataFrame({'ConsecutiveNums': result['num'].unique()}) +``` + +--- + + +## File Structure +``` +πŸ“‚ Problem Name + β”œβ”€β”€ πŸ“„ README.md # Problem statement, approach, solution + β”œβ”€β”€ πŸ“„ sql_solution.sql # SQL Solution + β”œβ”€β”€ πŸ“„ pandas_solution.py # Pandas Solution + └── πŸ“„ example_input_output.txt # Sample input & expected output +``` + +## Useful Links +- [LeetCode Problem](https://leetcode.com/problems/consecutive-numbers/) πŸš€ +- [SQL `JOIN` Explained](https://www.w3schools.com/sql/sql_join.asp) +- [MySQL `LAG()` Window Function](https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html) + +--- + +Feel free to contribute with optimized solutions! πŸ’‘ \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/185. Department Top Three Salaries/185. Department Top Three Salaries.py b/LeetCode SQL 50 Solution/185. Department Top Three Salaries/185. Department Top Three Salaries.py new file mode 100644 index 0000000..ff2e501 --- /dev/null +++ b/LeetCode SQL 50 Solution/185. Department Top Three Salaries/185. Department Top Three Salaries.py @@ -0,0 +1,18 @@ +### **Pandas Solution** + +import pandas as pd + +def department_top_three_salaries(employee: pd.DataFrame, department: pd.DataFrame) -> pd.DataFrame: + # Merge employee and department tables + employee = employee.merge(department, left_on='departmentId', right_on='id', suffixes=('', '_dept')) + + # Rank employees' salaries within each department + employee['rank'] = employee.groupby('departmentId')['salary'].rank(method='dense', ascending=False) + + # Filter top 3 salaries in each department + result = employee[employee['rank'] <= 3][['name_dept', 'name', 'salary']] + + # Rename columns to match the expected output + result.columns = ['Department', 'Employee', 'Salary'] + + return result \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/185. Department Top Three Salaries/185. Department Top Three Salaries.sql b/LeetCode SQL 50 Solution/185. Department Top Three Salaries/185. Department Top Three Salaries.sql new file mode 100644 index 0000000..7998e64 --- /dev/null +++ b/LeetCode SQL 50 Solution/185. Department Top Three Salaries/185. Department Top Three Salaries.sql @@ -0,0 +1,208 @@ +# 185. Department Top Three Salaries + +### Question +``` + Table: Employee + ++--------------+---------+ +| Column Name | Type | ++--------------+---------+ +| id | int | +| name | varchar | +| salary | int | +| departmentId | int | ++--------------+---------+ +``` +id is the primary key (column with unique values) for this table. +departmentId is a foreign key (reference column) of the ID from the Department table. +Each row of this table indicates the ID, name, and salary of an employee. It also contains the ID of their department. + + +Table: Department +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| name | varchar | ++-------------+---------+ +``` +id is the primary key (column with unique values) for this table. +Each row of this table indicates the ID of a department and its name. + + +A company's executives are interested in seeing who earns the most money in each of the company's departments. A high earner in a department is an employee who has a salary in the top three unique salaries for that department. + +Write a solution to find the employees who are high earners in each of the departments. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: +``` +Input: +Employee table: ++----+-------+--------+--------------+ +| id | name | salary | departmentId | ++----+-------+--------+--------------+ +| 1 | Joe | 85000 | 1 | +| 2 | Henry | 80000 | 2 | +| 3 | Sam | 60000 | 2 | +| 4 | Max | 90000 | 1 | +| 5 | Janet | 69000 | 1 | +| 6 | Randy | 85000 | 1 | +| 7 | Will | 70000 | 1 | ++----+-------+--------+--------------+ +``` +``` +Department table: ++----+-------+ +| id | name | ++----+-------+ +| 1 | IT | +| 2 | Sales | ++----+-------+ +Output: ++------------+----------+--------+ +| Department | Employee | Salary | ++------------+----------+--------+ +| IT | Max | 90000 | +| IT | Joe | 85000 | +| IT | Randy | 85000 | +| IT | Will | 70000 | +| Sales | Henry | 80000 | +| Sales | Sam | 60000 | ++------------+----------+--------+ +``` +Explanation: +In the IT department: +- Max earns the highest unique salary +- Both Randy and Joe earn the second-highest unique salary +- Will earns the third-highest unique salary + +In the Sales department: +- Henry earns the highest salary +- Sam earns the second-highest salary +- There is no third-highest salary as there are only two employees + + +Constraints: + +There are no employees with the exact same name, salary and department. + + +## Solution + +```sql +# 185. Department Top Three Salaries + +### Question +``` + Table: Employee + ++--------------+---------+ +| Column Name | Type | ++--------------+---------+ +| id | int | +| name | varchar | +| salary | int | +| departmentId | int | ++--------------+---------+ +``` +id is the primary key (column with unique values) for this table. +departmentId is a foreign key (reference column) of the ID from the Department table. +Each row of this table indicates the ID, name, and salary of an employee. It also contains the ID of their department. + + +Table: Department +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| name | varchar | ++-------------+---------+ +``` +id is the primary key (column with unique values) for this table. +Each row of this table indicates the ID of a department and its name. + + +A company's executives are interested in seeing who earns the most money in each of the company's departments. A high earner in a department is an employee who has a salary in the top three unique salaries for that department. + +Write a solution to find the employees who are high earners in each of the departments. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: +``` +Input: +Employee table: ++----+-------+--------+--------------+ +| id | name | salary | departmentId | ++----+-------+--------+--------------+ +| 1 | Joe | 85000 | 1 | +| 2 | Henry | 80000 | 2 | +| 3 | Sam | 60000 | 2 | +| 4 | Max | 90000 | 1 | +| 5 | Janet | 69000 | 1 | +| 6 | Randy | 85000 | 1 | +| 7 | Will | 70000 | 1 | ++----+-------+--------+--------------+ +``` +``` +Department table: ++----+-------+ +| id | name | ++----+-------+ +| 1 | IT | +| 2 | Sales | ++----+-------+ +Output: ++------------+----------+--------+ +| Department | Employee | Salary | ++------------+----------+--------+ +| IT | Max | 90000 | +| IT | Joe | 85000 | +| IT | Randy | 85000 | +| IT | Will | 70000 | +| Sales | Henry | 80000 | +| Sales | Sam | 60000 | ++------------+----------+--------+ + +Explanation: +In the IT department: +- Max earns the highest unique salary +- Both Randy and Joe earn the second-highest unique salary +- Will earns the third-highest unique salary + +In the Sales department: +- Henry earns the highest salary +- Sam earns the second-highest salary +- There is no third-highest salary as there are only two employees + + +Constraints: + +There are no employees with the exact same name, salary and department. + + +## Solution + +# Write your MySQL query statement below +SELECT d.Name as Department, +e.Name as Employee, +e.Salary as Salary +FROM Department d, Employee e +WHERE( + SELECT COUNT(distinct Salary) + FROM Employee + WHERE Salary > e.Salary AND DepartmentId = d.Id +) < 3 AND e.DepartmentId = d.Id +ORDER BY d.Id, e.Salary desc; diff --git a/LeetCode SQL 50 Solution/185. Department Top Three Salaries/readme.md b/LeetCode SQL 50 Solution/185. Department Top Three Salaries/readme.md new file mode 100644 index 0000000..36117c3 --- /dev/null +++ b/LeetCode SQL 50 Solution/185. Department Top Three Salaries/readme.md @@ -0,0 +1,169 @@ +# **185. Department Top Three Salaries** + +## **Problem Statement** +You are given two tables: `Employee` and `Department`. + +### **Employee Table** +```rb ++--------------+---------+ +| Column Name | Type | ++--------------+---------+ +| id | int | +| name | varchar | +| salary | int | +| departmentId | int | ++--------------+---------+ +``` +- `id` is the primary key. +- `departmentId` is a foreign key referencing `id` in the `Department` table. +- Each row represents an employee with their `id`, `name`, `salary`, and `departmentId`. + +### **Department Table** +```rb ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| name | varchar | ++-------------+---------+ +``` +- `id` is the primary key. +- Each row represents a department with its `id` and `name`. + +### **Task:** +Find employees who have a salary in the **top three unique salaries** in their respective departments. + +## **Example 1:** +### **Input:** +#### **Employee Table** +``` ++----+-------+--------+--------------+ +| id | name | salary | departmentId | ++----+-------+--------+--------------+ +| 1 | Joe | 85000 | 1 | +| 2 | Henry | 80000 | 2 | +| 3 | Sam | 60000 | 2 | +| 4 | Max | 90000 | 1 | +| 5 | Janet | 69000 | 1 | +| 6 | Randy | 85000 | 1 | +| 7 | Will | 70000 | 1 | ++----+-------+--------+--------------+ +``` +#### **Department Table** +```rb ++----+-------+ +| id | name | ++----+-------+ +| 1 | IT | +| 2 | Sales | ++----+-------+ +``` +### **Output:** +```rb ++------------+----------+--------+ +| Department | Employee | Salary | ++------------+----------+--------+ +| IT | Max | 90000 | +| IT | Joe | 85000 | +| IT | Randy | 85000 | +| IT | Will | 70000 | +| Sales | Henry | 80000 | +| Sales | Sam | 60000 | ++------------+----------+--------+ +``` + +--- + +## **Solution Approaches** + +### **SQL Solution (Using Self Join)** +```sql +SELECT d.Name as Department, + e.Name as Employee, + e.Salary as Salary +FROM Department d, Employee e +WHERE ( + SELECT COUNT(DISTINCT Salary) + FROM Employee + WHERE Salary > e.Salary AND DepartmentId = d.Id +) < 3 AND e.DepartmentId = d.Id +ORDER BY d.Id, e.Salary DESC; +``` +**Explanation:** +- For each employee, we count how many distinct salaries are greater than theirs. +- If fewer than 3 salaries are greater, the employee is in the **top three**. +- We filter results by department and order by salary in descending order. + +--- + +### **SQL Solution (Using Window Functions)** +```sql +WITH RankedSalaries AS ( + SELECT e.name AS Employee, + e.salary AS Salary, + d.name AS Department, + DENSE_RANK() OVER (PARTITION BY e.departmentId ORDER BY e.salary DESC) AS rnk + FROM Employee e + JOIN Department d ON e.departmentId = d.id +) +SELECT Department, Employee, Salary +FROM RankedSalaries +WHERE rnk <= 3; +``` +**Explanation:** +- We use `DENSE_RANK()` to assign a rank to salaries within each department. +- `PARTITION BY departmentId` ensures ranking is specific to each department. +- Employees with `rnk <= 3` are returned. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def department_top_three_salaries(employee: pd.DataFrame, department: pd.DataFrame) -> pd.DataFrame: + # Merge employee and department tables + employee = employee.merge(department, left_on='departmentId', right_on='id', suffixes=('', '_dept')) + + # Rank employees' salaries within each department + employee['rank'] = employee.groupby('departmentId')['salary'].rank(method='dense', ascending=False) + + # Filter top 3 salaries in each department + result = employee[employee['rank'] <= 3][['name_dept', 'name', 'salary']] + + # Rename columns to match the expected output + result.columns = ['Department', 'Employee', 'Salary'] + + return result +``` +**Explanation:** +- Merge the `Employee` and `Department` tables. +- Rank salaries within each department using `.rank()`. +- Filter the top 3 ranked salaries per department. + +--- + +## **File Structure** +``` +πŸ“‚ LeetCode185 +│── πŸ“œ problem_statement.md +│── πŸ“œ sql_self_join_solution.sql +│── πŸ“œ sql_window_function_solution.sql +│── πŸ“œ pandas_solution.py +│── πŸ“œ README.md +``` +- `problem_statement.md` β†’ Contains the problem description and constraints. +- `sql_self_join_solution.sql` β†’ Contains the SQL solution using self-join. +- `sql_window_function_solution.sql` β†’ Contains the SQL solution using `DENSE_RANK()`. +- `pandas_solution.py` β†’ Contains the Pandas solution for Python users. +- `README.md` β†’ Provides an overview of the problem and solutions. + +--- + +## **Useful Links** +- [LeetCode Problem 185](https://leetcode.com/problems/department-top-three-salaries/) +- [SQL DENSE_RANK() Function](https://www.w3schools.com/sql/sql_functions.asp) +- [Pandas Rank Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html) + +--- + diff --git a/LeetCode SQL 50 Solution/1907. Count Salary Categories/1907. Count Salary Categories.py b/LeetCode SQL 50 Solution/1907. Count Salary Categories/1907. Count Salary Categories.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1907. Count Salary Categories/1907. Count Salary Categories.sql b/LeetCode SQL 50 Solution/1907. Count Salary Categories/1907. Count Salary Categories.sql new file mode 100644 index 0000000..65786e1 --- /dev/null +++ b/LeetCode SQL 50 Solution/1907. Count Salary Categories/1907. Count Salary Categories.sql @@ -0,0 +1,170 @@ +907. Count Salary Categories +Solved +Medium +Topics +Companies +SQL Schema +Pandas Schema +Table: Accounts + ++-------------+------+ +| Column Name | Type | ++-------------+------+ +| account_id | int | +| income | int | ++-------------+------+ +account_id is the primary key (column with unique values) for this table. +Each row contains information about the monthly income for one bank account. + + +Write a solution to calculate the number of bank accounts for each salary category. The salary categories are: + +"Low Salary": All the salaries strictly less than $20000. +"Average Salary": All the salaries in the inclusive range [$20000, $50000]. +"High Salary": All the salaries strictly greater than $50000. +The result table must contain all three categories. If there are no accounts in a category, return 0. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Accounts table: ++------------+--------+ +| account_id | income | ++------------+--------+ +| 3 | 108939 | +| 2 | 12747 | +| 8 | 87709 | +| 6 | 91796 | ++------------+--------+ +Output: ++----------------+----------------+ +| category | accounts_count | ++----------------+----------------+ +| Low Salary | 1 | +| Average Salary | 0 | +| High Salary | 3 | ++----------------+----------------+ +Explanation: +Low Salary: Account 2. +Average Salary: No accounts. +High Salary: Accounts 3, 6, and 8. + + +# Write your MySQL query statement below +WITH + S AS ( + SELECT 'Low Salary' AS category + UNION + SELECT 'Average Salary' + UNION + SELECT 'High Salary' + ), + T AS ( + SELECT + CASE + WHEN income < 20000 THEN "Low Salary" + WHEN income > 50000 THEN 'High Salary' + ELSE 'Average Salary' + END AS category, + COUNT(1) AS accounts_count + FROM Accounts + GROUP BY 1 + ) +SELECT category, IFNULL(accounts_count, 0) AS accounts_count +FROM + S + LEFT JOIN T USING (category); + + + +### 1. Common Table Expression (CTE) "S" + + +WITH + S AS ( + SELECT 'Low Salary' AS category + UNION + SELECT 'Average Salary' + UNION + SELECT 'High Salary' + ), + + +- **Purpose:** + This CTE defines a static list of salary categories. +- **How it works:** + - The `SELECT` statements with `UNION` combine three rows, each containing one of the categories: `'Low Salary'`, `'Average Salary'`, and `'High Salary'`. +- **Result:** + The result of `S` is a temporary table with one column (`category`) and three rows. + +--- + +### 2. Common Table Expression (CTE) "T" + +```sql + T AS ( + SELECT + CASE + WHEN income < 20000 THEN "Low Salary" + WHEN income > 50000 THEN 'High Salary' + ELSE 'Average Salary' + END AS category, + COUNT(1) AS accounts_count + FROM Accounts + GROUP BY 1 + ) +``` + +- **Purpose:** + This CTE categorizes each account from the `Accounts` table based on the `income` value, then counts the number of accounts in each category. +- **How it works:** + - **CASE Statement:** + - If `income` is less than 20000, it labels the row as `"Low Salary"`. + - If `income` is greater than 50000, it labels the row as `"High Salary"`. + - Otherwise, it labels the row as `"Average Salary"`. + - **COUNT(1):** + - It counts the number of rows (accounts) in each category. + - **GROUP BY 1:** + - It groups the results by the first column in the SELECT list, which is the computed `category`. +- **Result:** + The result of `T` is a temporary table that contains two columns: `category` and `accounts_count`. It holds the count of accounts for each salary category that exists in the `Accounts` table. + +--- + +### 3. Final SELECT with LEFT JOIN + +```sql +SELECT category, IFNULL(accounts_count, 0) AS accounts_count +FROM + S + LEFT JOIN T USING (category); +``` + +- **Purpose:** + This part combines the two CTEs (`S` and `T`) to ensure that every salary category from `S` is included in the final result, even if there are no corresponding accounts in `T`. +- **How it works:** + - **LEFT JOIN:** + - It joins `S` (all predefined categories) with `T` (the computed counts) on the `category` column. + - If a category from `S` does not exist in `T` (i.e., there were no accounts that fell into that category), the join will produce a `NULL` value for `accounts_count`. + - **IFNULL(accounts_count, 0):** + - This function replaces any `NULL` in `accounts_count` with `0`, ensuring that the final output shows 0 for categories with no accounts. +- **Result:** + The final output is a list of salary categories along with the corresponding count of accounts. If a category has no accounts, it will show as 0. + +--- + +### Summary + +- **CTE "S":** Defines a static list of salary categories. +- **CTE "T":** Categorizes and counts accounts from the `Accounts` table based on income. +- **LEFT JOIN:** Combines both CTEs so every predefined category appears in the final result, with missing counts defaulting to 0. +- **Final Output:** + A table with two columns: + - `category`: The salary category (Low Salary, Average Salary, High Salary). + - `accounts_count`: The number of accounts in that category (or 0 if there are none). diff --git a/LeetCode SQL 50 Solution/1907. Count Salary Categories/readme.md b/LeetCode SQL 50 Solution/1907. Count Salary Categories/readme.md new file mode 100644 index 0000000..e5fc42d --- /dev/null +++ b/LeetCode SQL 50 Solution/1907. Count Salary Categories/readme.md @@ -0,0 +1,184 @@ +# πŸ’° Count Salary Categories - LeetCode 907 + +## πŸ“Œ Problem Statement +You are given a table **Accounts** that contains information about bank accounts, including their monthly income. +Your task is to calculate the number of bank accounts in each salary category. + +The salary categories are defined as follows: +- **"Low Salary"**: Salaries strictly less than \$20,000. +- **"Average Salary"**: Salaries in the inclusive range [\$20,000, \$50,000]. +- **"High Salary"**: Salaries strictly greater than \$50,000. + +The result table must contain **all three categories**. If there are no accounts in a category, return 0. + +Return the result in **any order**. + +--- + +## πŸ“Š Table Structure + +### **Accounts Table** +| Column Name | Type | +| ----------- | ---- | +| account_id | int | +| income | int | + +- `account_id` is the **primary key** for this table. +- Each row contains the monthly income for one bank account. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Accounts Table** +| account_id | income | +| ---------- | ------ | +| 3 | 108939 | +| 2 | 12747 | +| 8 | 87709 | +| 6 | 91796 | + +### **Output:** +| category | accounts_count | +| -------------- | -------------- | +| Low Salary | 1 | +| Average Salary | 0 | +| High Salary | 3 | + +### **Explanation:** +- **Low Salary**: Account with income 12747. +- **Average Salary**: No accounts have an income in the range [20000, 50000]. +- **High Salary**: Accounts with incomes 108939, 87709, and 91796. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +1. **CTE "S"**: Create a static table with the three salary categories. + ```sql + WITH S AS ( + SELECT 'Low Salary' AS category + UNION + SELECT 'Average Salary' + UNION + SELECT 'High Salary' + ), + ``` + - This defines the three salary categories to ensure every category appears in the final result. + +2. **CTE "T"**: Categorize each account from the **Accounts** table using a `CASE` statement and count the number of accounts in each category. + ```sql + T AS ( + SELECT + CASE + WHEN income < 20000 THEN 'Low Salary' + WHEN income > 50000 THEN 'High Salary' + ELSE 'Average Salary' + END AS category, + COUNT(1) AS accounts_count + FROM Accounts + GROUP BY 1 + ) + ``` + - The `CASE` statement assigns a salary category based on the income. + - `COUNT(1)` counts the number of accounts in each category. + +3. **Final SELECT with LEFT JOIN**: Combine the static category table `S` with the computed counts from `T` to ensure every category is included, using `IFNULL` to convert any missing count to 0. + ```sql + SELECT S.category, IFNULL(T.accounts_count, 0) AS accounts_count + FROM S + LEFT JOIN T USING (category); + ``` + +### βœ… **Complete SQL Query:** +```sql +WITH S AS ( + SELECT 'Low Salary' AS category + UNION + SELECT 'Average Salary' + UNION + SELECT 'High Salary' +), +T AS ( + SELECT + CASE + WHEN income < 20000 THEN 'Low Salary' + WHEN income > 50000 THEN 'High Salary' + ELSE 'Average Salary' + END AS category, + COUNT(1) AS accounts_count + FROM Accounts + GROUP BY 1 +) +SELECT S.category, IFNULL(T.accounts_count, 0) AS accounts_count +FROM S +LEFT JOIN T USING (category); +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. **Categorize Accounts**: Create a new column `category` in the DataFrame by applying the salary conditions. +2. **Group and Count**: Group by the `category` column and count the number of accounts. +3. **Merge with Static Categories**: Ensure all three salary categories appear by merging with a predefined DataFrame that contains all categories, filling missing counts with 0. + +```python +import pandas as pd + +def count_salary_categories(accounts: pd.DataFrame) -> pd.DataFrame: + # Define the salary categorization function + def categorize(income): + if income < 20000: + return 'Low Salary' + elif income > 50000: + return 'High Salary' + else: + return 'Average Salary' + + # Apply categorization + accounts['category'] = accounts['income'].apply(categorize) + + # Count accounts in each category + counts = accounts.groupby('category').size().reset_index(name='accounts_count') + + # Define static categories DataFrame + categories = pd.DataFrame({ + 'category': ['Low Salary', 'Average Salary', 'High Salary'] + }) + + # Merge to ensure all categories are present, fill missing values with 0 + result = categories.merge(counts, on='category', how='left') + result['accounts_count'] = result['accounts_count'].fillna(0).astype(int) + + return result + +# Example usage: +# df = pd.read_csv("sample_accounts.csv") +# print(count_salary_categories(df)) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Count-Salary-Categories +│── README.md +│── solution.sql +│── solution_pandas.py +│── test_cases.sql +│── sample_accounts.csv +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/count-salary-categories/) +- πŸ“ [MySQL WITH Clause (CTE)](https://www.w3schools.com/sql/sql_with.asp) +- πŸ” [MySQL IFNULL Function](https://www.w3schools.com/sql/func_mysql_ifnull.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) +``` + diff --git a/LeetCode SQL 50 Solution/1934. Confirmation Rate/1934. Confirmation Rate.py b/LeetCode SQL 50 Solution/1934. Confirmation Rate/1934. Confirmation Rate.py new file mode 100644 index 0000000..e69de29 diff --git a/1934. Confirmation Rate.sql b/LeetCode SQL 50 Solution/1934. Confirmation Rate/1934. Confirmation Rate.sql similarity index 100% rename from 1934. Confirmation Rate.sql rename to LeetCode SQL 50 Solution/1934. Confirmation Rate/1934. Confirmation Rate.sql diff --git a/LeetCode SQL 50 Solution/1934. Confirmation Rate/readme.md b/LeetCode SQL 50 Solution/1934. Confirmation Rate/readme.md new file mode 100644 index 0000000..eab61d4 --- /dev/null +++ b/LeetCode SQL 50 Solution/1934. Confirmation Rate/readme.md @@ -0,0 +1,173 @@ +# πŸ“© Confirmation Rate - LeetCode 1934 + +## πŸ“Œ Problem Statement +You are given two tables: **Signups** and **Confirmations**. + +- The **Signups** table contains the signup time for each user. +- The **Confirmations** table records each confirmation request made by a user along with the outcome (either `'confirmed'` or `'timeout'`). + +The **confirmation rate** for a user is defined as: +\[ +\text{confirmation rate} = \frac{\text{Number of confirmed messages}}{\text{Total number of confirmation requests}} +\] +If a user did not request any confirmation messages, their confirmation rate is defined as 0. + +Your task is to calculate the confirmation rate for each user and round it to two decimal places. + +Return the result table in **any order**. + +--- + +## πŸ“Š Table Structure + +### **Signups Table** +| Column Name | Type | +| ----------- | -------- | +| user_id | int | +| time_stamp | datetime | + +- `user_id` is unique for each user. + +### **Confirmations Table** +| Column Name | Type | +| ----------- | -------- | +| user_id | int | +| time_stamp | datetime | +| action | ENUM | + +- `(user_id, time_stamp)` is the primary key. +- `action` is either `'confirmed'` or `'timeout'`. +- `user_id` in Confirmations is a foreign key to Signups. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Signups Table** +| user_id | time_stamp | +| ------- | ------------------- | +| 3 | 2020-03-21 10:16:13 | +| 7 | 2020-01-04 13:57:59 | +| 2 | 2020-07-29 23:09:44 | +| 6 | 2020-12-09 10:39:37 | + +#### **Confirmations Table** +| user_id | time_stamp | action | +| ------- | ------------------- | --------- | +| 3 | 2021-01-06 03:30:46 | timeout | +| 3 | 2021-07-14 14:00:00 | timeout | +| 7 | 2021-06-12 11:57:29 | confirmed | +| 7 | 2021-06-13 12:58:28 | confirmed | +| 7 | 2021-06-14 13:59:27 | confirmed | +| 2 | 2021-01-22 00:00:00 | confirmed | +| 2 | 2021-02-28 23:59:59 | timeout | + +### **Output:** +| user_id | confirmation_rate | +| ------- | ----------------- | +| 6 | 0.00 | +| 3 | 0.00 | +| 7 | 1.00 | +| 2 | 0.50 | + +### **Explanation:** +- **User 6** did not request any confirmation messages, so the rate is **0.00**. +- **User 3** made 2 requests; both were timeouts, so the rate is **0.00**. +- **User 7** made 3 requests; all were confirmed, so the rate is **1.00**. +- **User 2** made 2 requests; 1 confirmed and 1 timeout, so the rate is **0.50**. + +--- + +## πŸ–₯ SQL Solutions + +### βœ… **Solution 1: Using Shorthand Boolean Expressions** +#### **Explanation:** +- `SUM(action = 'confirmed')` counts the number of rows where the action is `'confirmed'` (in MySQL, boolean expressions return 1 if true, 0 if false). +- `COUNT(1)` counts all confirmation requests. +- We use a `LEFT JOIN` between **Signups** and **Confirmations** so that users without any confirmation requests are included (their rate becomes 0). +- `IFNULL` is used to handle cases where a user has no confirmation requests. + +```sql +SELECT + user_id, + ROUND(IFNULL(SUM(action = 'confirmed') / COUNT(1), 0), 2) AS confirmation_rate +FROM Signups +LEFT JOIN Confirmations USING (user_id) +GROUP BY user_id; +``` + +### βœ… **Solution 2: Using a CASE Statement** +#### **Explanation:** +- The `CASE` statement explicitly counts 1 for `'confirmed'` actions and 0 otherwise. +- The rest of the query logic remains similar. + +```sql +SELECT + user_id, + ROUND(IFNULL(SUM(CASE WHEN action = 'confirmed' THEN 1 ELSE 0 END) / COUNT(1), 0), 2) AS confirmation_rate +FROM Signups +LEFT JOIN Confirmations USING (user_id) +GROUP BY user_id; +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. **Merge** the **Signups** and **Confirmations** DataFrames on `user_id` using a left join, so that all users are included. +2. **Count** the total number of confirmation requests and the number of confirmed requests for each user. +3. **Calculate** the confirmation rate as the number of confirmed requests divided by the total requests. +4. **Handle** users with no confirmation requests by setting their rate to 0. +5. **Round** the confirmation rate to two decimal places. + +```python +import pandas as pd + +def confirmation_rate(signups: pd.DataFrame, confirmations: pd.DataFrame) -> pd.DataFrame: + # Merge the dataframes to include all users from signups + merged = pd.merge(signups, confirmations, on='user_id', how='left') + + # Group by user_id and calculate total requests and confirmed requests + summary = merged.groupby('user_id').agg( + total_requests=('action', 'count'), + confirmed_requests=('action', lambda x: (x == 'confirmed').sum()) + ).reset_index() + + # Calculate confirmation rate; if total_requests is 0, rate is 0. + summary['confirmation_rate'] = summary.apply( + lambda row: round(row['confirmed_requests'] / row['total_requests'], 2) if row['total_requests'] > 0 else 0.00, + axis=1 + ) + + # Select the relevant columns + result = summary[['user_id', 'confirmation_rate']] + return result + +# Example usage: +# signups_df = pd.read_csv("signups.csv") +# confirmations_df = pd.read_csv("confirmations.csv") +# print(confirmation_rate(signups_df, confirmations_df)) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Confirmation-Rate +│── README.md +│── solution.sql +│── solution_pandas.py +│── test_cases.sql +│── sample_signups.csv +│── sample_confirmations.csv +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/confirmation-rate/) +- πŸ“ [MySQL IFNULL Function](https://www.w3schools.com/sql/func_mysql_ifnull.asp) +- πŸ” [MySQL ROUND Function](https://www.w3schools.com/sql/func_mysql_round.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/196. Delete Duplicate Emails.py b/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/196. Delete Duplicate Emails.py new file mode 100644 index 0000000..7bc23de --- /dev/null +++ b/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/196. Delete Duplicate Emails.py @@ -0,0 +1,5 @@ +import pandas as pd + +def delete_duplicate_emails(person: pd.DataFrame) -> None: + # Keep only the first occurrence of each email (smallest id) + person.drop_duplicates(subset=['email'], keep='first', inplace=True) \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/196. Delete Duplicate Emails.sql b/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/196. Delete Duplicate Emails.sql new file mode 100644 index 0000000..b9596df --- /dev/null +++ b/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/196. Delete Duplicate Emails.sql @@ -0,0 +1,6 @@ +-- 196. Delete Duplicate Emails +# Write your MySQL query statement below +# Write your MySQL query statement below +DELETE p2 FROM Person p1 +JOIN Person p2 +ON p1.email = p2.email AND p1.id < p2.id; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/readme.md b/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/readme.md new file mode 100644 index 0000000..8e355db --- /dev/null +++ b/LeetCode SQL 50 Solution/196. Delete Duplicate Emails/readme.md @@ -0,0 +1,118 @@ + +# **196. Delete Duplicate Emails** + +## **Problem Statement** +You are given a table called `Person`, which stores email addresses. + +### **Person Table** +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| email | varchar | ++-------------+---------+ +``` +- `id` is the **primary key**. +- Each row contains an **email address**. +- All emails are in **lowercase**. + +### **Task:** +Delete all **duplicate emails**, keeping only **one unique email** with the **smallest id**. + +--- + +## **Example 1:** +### **Input:** +#### **Person Table** +``` ++----+------------------+ +| id | email | ++----+------------------+ +| 1 | john@example.com | +| 2 | bob@example.com | +| 3 | john@example.com | ++----+------------------+ +``` +### **Output:** +``` ++----+------------------+ +| id | email | ++----+------------------+ +| 1 | john@example.com | +| 2 | bob@example.com | ++----+------------------+ +``` +### **Explanation:** +- `john@example.com` appears **twice**. +- We keep the row with the **smallest `id`** (`id = 1`). +- The duplicate (`id = 3`) is **deleted**. + +--- + +## **Solution Approaches** + +### **SQL Solution (Using Self Join)** +```sql +DELETE p2 FROM Person p1 +JOIN Person p2 +ON p1.email = p2.email AND p1.id < p2.id; +``` +**Explanation:** +- `p1` and `p2` refer to the **same table** (`Person`). +- We **join** them on `email` to find duplicates. +- If `p1.id < p2.id`, we delete `p2`, keeping the row with the **smallest id**. + +--- + +### **SQL Solution (Using Subquery)** +```sql +DELETE FROM Person +WHERE id NOT IN ( + SELECT MIN(id) FROM Person GROUP BY email +); +``` +**Explanation:** +- We **group** by `email` and **select the smallest `id`** for each email. +- The `DELETE` statement removes rows **not in** this list. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def delete_duplicate_emails(person: pd.DataFrame) -> None: + # Keep only the first occurrence of each email (smallest id) + person.drop_duplicates(subset=['email'], keep='first', inplace=True) +``` +**Explanation:** +- `drop_duplicates(subset=['email'], keep='first', inplace=True)`: + - Keeps only **the first occurrence** of each email. + - Ensures **modification happens in place**. + +--- + +## **File Structure** +``` +πŸ“‚ LeetCode196 +│── πŸ“œ problem_statement.md +│── πŸ“œ sql_self_join_solution.sql +│── πŸ“œ sql_subquery_solution.sql +│── πŸ“œ pandas_solution.py +│── πŸ“œ README.md +``` +- `problem_statement.md` β†’ Contains the problem description. +- `sql_self_join_solution.sql` β†’ Contains the SQL solution using **JOIN**. +- `sql_subquery_solution.sql` β†’ Contains the SQL solution using **Subquery**. +- `pandas_solution.py` β†’ Contains the Pandas solution for Python users. +- `README.md` β†’ Provides an overview of the problem and solutions. + +--- + +## **Useful Links** +- [LeetCode Problem 196](https://leetcode.com/problems/delete-duplicate-emails/) +- [SQL DELETE Statement](https://www.w3schools.com/sql/sql_delete.asp) +- [Pandas drop_duplicates()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html) + +--- diff --git a/LeetCode SQL 50 Solution/197. Rising Temperature/Rising Temperature.py b/LeetCode SQL 50 Solution/197. Rising Temperature/Rising Temperature.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/197. Rising Temperature/Rising Temperature.sql b/LeetCode SQL 50 Solution/197. Rising Temperature/Rising Temperature.sql new file mode 100644 index 0000000..72b2d7f --- /dev/null +++ b/LeetCode SQL 50 Solution/197. Rising Temperature/Rising Temperature.sql @@ -0,0 +1,21 @@ +"""197. Rising Temperature""" + +WITH PreviousWeatherData AS +( + SELECT + id, + recordDate, + temperature, + LAG(temperature, 1) OVER (ORDER BY recordDate) AS PreviousTemperature, + LAG(recordDate, 1) OVER (ORDER BY recordDate) AS PreviousRecordDate + FROM + Weather +) +SELECT + id +FROM + PreviousWeatherData +WHERE + temperature > PreviousTemperature +AND + recordDate = DATE_ADD(PreviousRecordDate, INTERVAL 1 DAY); \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/197. Rising Temperature/readme.md b/LeetCode SQL 50 Solution/197. Rising Temperature/readme.md new file mode 100644 index 0000000..79d81cd --- /dev/null +++ b/LeetCode SQL 50 Solution/197. Rising Temperature/readme.md @@ -0,0 +1,157 @@ + +# **197. Rising Temperature** + +## **Problem Statement** +You are given a table called `Weather`, which contains daily temperature records. + +### **Weather Table** +``` ++---------------+---------+ +| Column Name | Type | ++---------------+---------+ +| id | int | +| recordDate | date | +| temperature | int | ++---------------+---------+ +``` +- `id` is the **primary key**. +- Each row contains: + - `recordDate`: The **date** of the temperature record. + - `temperature`: The **temperature recorded** on that date. + +### **Task:** +Find all `id`s where the **temperature** is **higher** than the **previous day's temperature**. + +--- + +## **Example 1:** +### **Input:** +#### **Weather Table** +``` ++----+------------+-------------+ +| id | recordDate | temperature | ++----+------------+-------------+ +| 1 | 2024-08-01 | 30 | +| 2 | 2024-08-02 | 32 | +| 3 | 2024-08-03 | 31 | +| 4 | 2024-08-04 | 35 | +| 5 | 2024-08-05 | 36 | ++----+------------+-------------+ +``` +### **Output:** +``` ++----+ +| id | ++----+ +| 2 | +| 4 | +| 5 | ++----+ +``` +### **Explanation:** +- `id = 2`: `32 > 30` (08-02 > 08-01 βœ…) +- `id = 3`: `31 < 32` (Skipped ❌) +- `id = 4`: `35 > 31` (08-04 > 08-03 βœ…) +- `id = 5`: `36 > 35` (08-05 > 08-04 βœ…) + +--- + +## **Solution Approaches** + +### **SQL Solution (Using `LAG()` Window Function)** +```sql +WITH PreviousWeatherData AS +( + SELECT + id, + recordDate, + temperature, + LAG(temperature, 1) OVER (ORDER BY recordDate) AS PreviousTemperature, + LAG(recordDate, 1) OVER (ORDER BY recordDate) AS PreviousRecordDate + FROM + Weather +) +SELECT + id +FROM + PreviousWeatherData +WHERE + temperature > PreviousTemperature +AND + recordDate = DATE_ADD(PreviousRecordDate, INTERVAL 1 DAY); +``` +**Explanation:** +- We use `LAG()` to fetch: + - The **previous day's temperature**. + - The **previous day's date**. +- The `WHERE` clause filters rows where: + - The **temperature is higher than the previous day**. + - The **date difference is exactly 1 day**. + +--- + +### **SQL Solution (Using Self Join)** +```sql +SELECT w1.id +FROM Weather w1 +JOIN Weather w2 +ON DATEDIFF(w1.recordDate, w2.recordDate) = 1 +AND w1.temperature > w2.temperature; +``` +**Explanation:** +- We **self-join** the `Weather` table. +- The condition `DATEDIFF(w1.recordDate, w2.recordDate) = 1` ensures: + - We are comparing **consecutive days**. +- The condition `w1.temperature > w2.temperature` ensures: + - We select days where the **temperature increased**. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame: + weather.sort_values(by="recordDate", inplace=True) + weather["PreviousTemp"] = weather["temperature"].shift(1) + weather["PreviousDate"] = weather["recordDate"].shift(1) + + result = weather[ + (weather["temperature"] > weather["PreviousTemp"]) & + ((weather["recordDate"] - weather["PreviousDate"]).dt.days == 1) + ] + + return result[["id"]] +``` +**Explanation:** +- We **sort** by `recordDate`. +- We **shift** the temperature and date to get the previous day's values. +- We **filter** where: + - Temperature **increased**. + - Date difference is **1 day**. + +--- + +## **File Structure** +``` +πŸ“‚ LeetCode197 +│── πŸ“œ problem_statement.md +│── πŸ“œ sql_lag_solution.sql +│── πŸ“œ sql_self_join_solution.sql +│── πŸ“œ pandas_solution.py +│── πŸ“œ README.md +``` +- `problem_statement.md` β†’ Contains the problem description. +- `sql_lag_solution.sql` β†’ Contains the SQL solution using **LAG()**. +- `sql_self_join_solution.sql` β†’ Contains the SQL solution using **Self Join**. +- `pandas_solution.py` β†’ Contains the Pandas solution. +- `README.md` β†’ Provides an overview of the problem and solutions. + +--- + +## **Useful Links** +- [LeetCode Problem 197](https://leetcode.com/problems/rising-temperature/) +- [SQL LAG() Function](https://www.w3schools.com/sql/sql_ref_window_functions.asp) +- [SQL JOIN](https://www.w3schools.com/sql/sql_join.asp) +- [Pandas shift()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.shift.html) + diff --git a/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/1978. Employees Whose Manager Left the Company.py b/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/1978. Employees Whose Manager Left the Company.py new file mode 100644 index 0000000..e69de29 diff --git a/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/1978. Employees Whose Manager Left the Company.sql b/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/1978. Employees Whose Manager Left the Company.sql new file mode 100644 index 0000000..b0014c3 --- /dev/null +++ b/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/1978. Employees Whose Manager Left the Company.sql @@ -0,0 +1,182 @@ +1978. Employees Whose Manager Left the Company +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Employees + ++-------------+----------+ +| Column Name | Type | ++-------------+----------+ +| employee_id | int | +| name | varchar | +| manager_id | int | +| salary | int | ++-------------+----------+ +In SQL, employee_id is the primary key for this table. +This table contains information about the employees, their salary, and the ID of their manager. Some employees do not have a manager (manager_id is null). + + +Find the IDs of the employees whose salary is strictly less than $30000 and whose manager left the company. When a manager leaves the company, their information is deleted from the Employees table, but the reports still have their manager_id set to the manager that left. + +Return the result table ordered by employee_id. + +The result format is in the following example. + + + +Example 1: + +Input: +Employees table: ++-------------+-----------+------------+--------+ +| employee_id | name | manager_id | salary | ++-------------+-----------+------------+--------+ +| 3 | Mila | 9 | 60301 | +| 12 | Antonella | null | 31000 | +| 13 | Emery | null | 67084 | +| 1 | Kalel | 11 | 21241 | +| 9 | Mikaela | null | 50937 | +| 11 | Joziah | 6 | 28485 | ++-------------+-----------+------------+--------+ +Output: ++-------------+ +| employee_id | ++-------------+ +| 11 | ++-------------+ + +Explanation: +The employees with a salary less than $30000 are 1 (Kalel) and 11 (Joziah). +Kalel's manager is employee 11, who is still in the company (Joziah). +Joziah's manager is employee 6, who left the company because there is no row for employee 6 as it was deleted. + + + +# Write your MySQL query statement below +SELECT e1.employee_id +FROM + Employees AS e1 + LEFT JOIN Employees AS e2 ON e1.manager_id = e2.employee_id +WHERE e1.salary < 30000 AND e1.manager_id IS NOT NULL AND e2.employee_id IS NULL +ORDER BY 1; + + + + + +Sure! Let's break down the given MySQL query step by step, explain each part, and analyze its purpose. + +--- + +## **Understanding the Problem Statement** +We want to find **employees** whose: +1. **Salary is less than 30,000.** +2. **Have a manager (i.e., `manager_id IS NOT NULL`).** +3. **Their manager does not exist in the `Employees` table.** (i.e., the `manager_id` they refer to does not match any existing `employee_id`). + +--- + +## **Database Schema** +Assume we have a table called `Employees` with the following structure: + +| employee_id | name | salary | manager_id | +|------------|--------|--------|------------| +| 1 | Alice | 50000 | NULL | +| 2 | Bob | 20000 | 1 | +| 3 | Charlie| 25000 | 4 | +| 4 | David | 60000 | NULL | +| 5 | Emma | 27000 | 10 | + +- **`employee_id`**: Unique ID for each employee. +- **`name`**: Employee’s name. +- **`salary`**: Salary of the employee. +- **`manager_id`**: The `employee_id` of their manager. `NULL` means the employee has no manager. + +--- + +## **Step-by-Step Explanation of the Query** +```sql +SELECT e1.employee_id +``` +- We are selecting the `employee_id` of employees who satisfy the given conditions. + +```sql +FROM Employees AS e1 +``` +- We define the alias `e1` for the `Employees` table to refer to employees. + +```sql +LEFT JOIN Employees AS e2 ON e1.manager_id = e2.employee_id +``` +- We perform a **LEFT JOIN** to check if the `manager_id` of `e1` exists in the `employee_id` column of another instance of `Employees` (aliased as `e2`). +- If there is **no match**, it means the manager does not exist in the table. + +```sql +WHERE e1.salary < 30000 +``` +- We **filter employees** whose salary is **less than 30,000**. + +```sql +AND e1.manager_id IS NOT NULL +``` +- Ensures that the employee **has a manager** (`manager_id` should not be `NULL`). + +```sql +AND e2.employee_id IS NULL +``` +- This is the key condition! +- Since we did a **LEFT JOIN**, `e2.employee_id` will be `NULL` if the manager **does not exist** in the `Employees` table. + +```sql +ORDER BY 1; +``` +- Orders the result by `employee_id` in ascending order. + +--- + +## **Example Execution** +Using the above `Employees` table, let's analyze the output: + +| employee_id | name | salary | manager_id | +|------------|--------|--------|------------| +| 1 | Alice | 50000 | NULL | +| 2 | Bob | 20000 | 1 | +| 3 | Charlie| 25000 | 4 | +| 4 | David | 60000 | NULL | +| 5 | Emma | 27000 | 10 | + +### **Step 1: LEFT JOIN** +We join the table on `manager_id = employee_id`: + +| e1.employee_id | e1.salary | e1.manager_id | e2.employee_id | +|---------------|----------|--------------|---------------| +| 2 | 20000 | 1 | 1 | +| 3 | 25000 | 4 | 4 | +| 5 | 27000 | 10 | NULL | + +### **Step 2: Apply WHERE Conditions** +- `e1.salary < 30000`: Only `Bob`, `Charlie`, and `Emma` qualify. +- `e1.manager_id IS NOT NULL`: No issue. +- `e2.employee_id IS NULL`: Only **Emma (employee_id = 5) qualifies** because `manager_id = 10`, and there is no `employee_id = 10`. + +### **Final Output** +| employee_id | +|------------| +| 5 | + +So, the result is: +``` +5 +``` +--- +## **Summary** +### **Key Takeaways:** +βœ” We use **LEFT JOIN** to check if the manager exists. +βœ” We filter employees whose **salary is below 30,000**. +βœ” We ensure the manager **does not exist** in the database (`e2.employee_id IS NULL`). +βœ” The query returns a **list of employees** who meet these conditions. + +Would you like me to explain anything further or modify the query for additional scenarios? 😊 \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/readme.md b/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/readme.md new file mode 100644 index 0000000..66aea43 --- /dev/null +++ b/LeetCode SQL 50 Solution/1978. Employees Whose Manager Left the Company/readme.md @@ -0,0 +1,147 @@ +# 🏒 Employees Whose Manager Left the Company - LeetCode 1978 + +## πŸ“Œ Problem Statement +You are given a table **Employees** that contains information about employees, including their salary and the ID of the manager they report to. +When a manager leaves the company, their row is deleted from the **Employees** table, but the `manager_id` in the records of their reports still remains. + +Your task is to **find the IDs of employees** who: +- Have a salary **strictly less than $30000**. +- Have a **manager** (i.e., `manager_id` is not `NULL`) whose record is **missing** in the table (i.e., the manager left the company). + +Return the result table **ordered by `employee_id`** in ascending order. + +--- + +## πŸ“Š Table Structure + +### **Employees Table** +| Column Name | Type | +| ----------- | ------- | +| employee_id | int | +| name | varchar | +| manager_id | int | +| salary | int | + +- `employee_id` is the **primary key**. +- `manager_id` may be `NULL` if an employee does not have a manager. +- When a manager leaves, their row is deleted, but the `manager_id` remains in the reports' records. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Employees Table** +| employee_id | name | manager_id | salary | +| ----------- | --------- | ---------- | ------ | +| 3 | Mila | 9 | 60301 | +| 12 | Antonella | NULL | 31000 | +| 13 | Emery | NULL | 67084 | +| 1 | Kalel | 11 | 21241 | +| 9 | Mikaela | NULL | 50937 | +| 11 | Joziah | 6 | 28485 | + +### **Output:** +| employee_id | +| ----------- | +| 11 | + +### **Explanation:** +- **Employees with salary < $30000:** + - **Kalel (ID 1)** with salary 21241, whose manager is employee 11. + - **Joziah (ID 11)** with salary 28485, whose manager is employee 6. +- **Kalel's manager (ID 11)** is still in the table. +- **Joziah's manager (ID 6)** is missing from the table, meaning that manager left the company. +Thus, only **employee 11 (Joziah)** meets the criteria. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +1. **Self-Join:** + - Use a `LEFT JOIN` on the **Employees** table with itself to check if an employee's manager exists. + - Alias `e1` represents the employee, and alias `e2` represents the manager. +2. **Filter Conditions:** + - The employee's `salary` must be strictly less than 30000. + - The employee must have a manager (`e1.manager_id IS NOT NULL`). + - The join should fail for the manager (`e2.employee_id IS NULL`), indicating the manager left. +3. **Order the Result:** + - Order the final result by `employee_id`. + +### βœ… **SQL Query:** +```sql +SELECT e1.employee_id +FROM Employees AS e1 +LEFT JOIN Employees AS e2 ON e1.manager_id = e2.employee_id +WHERE e1.salary < 30000 + AND e1.manager_id IS NOT NULL + AND e2.employee_id IS NULL +ORDER BY e1.employee_id; +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. **Self-Merge:** + - Merge the **Employees** DataFrame with itself on `manager_id` (from the employee side) and `employee_id` (from the manager side) using a left join. +2. **Filter Rows:** + - Keep rows where: + - `salary` is less than 30000. + - `manager_id` is not null. + - The merged manager information is missing (i.e., the manager left). +3. **Sort Result:** + - Sort the result by `employee_id`. + +### βœ… **Pandas Code:** +```python +import pandas as pd + +def employees_with_left_manager(employees: pd.DataFrame) -> pd.DataFrame: + # Perform a left merge on the Employees table to find existing managers + merged = employees.merge( + employees[['employee_id']], + left_on='manager_id', + right_on='employee_id', + how='left', + suffixes=('', '_manager') + ) + + # Filter: salary < 30000, manager_id is not null, and manager does not exist (NaN in employee_id_manager) + filtered = merged[ + (merged['salary'] < 30000) & + (merged['manager_id'].notnull()) & + (merged['employee_id_manager'].isna()) + ] + + # Select the required column and sort by employee_id + result = filtered[['employee_id']].sort_values('employee_id') + return result + +# Example usage: +# employees_df = pd.read_csv("employees.csv") +# print(employees_with_left_manager(employees_df)) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Employees-Manager-Left +│── README.md +│── solution.sql +│── solution_pandas.py +│── test_cases.sql +│── sample_data.csv +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/employees-whose-manager-left-the-company/) +- πŸ” [MySQL LEFT JOIN Documentation](https://www.w3schools.com/sql/sql_join_left.asp) +- 🐍 [Pandas Merge Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) +``` + diff --git a/LeetCode SQL 50 Solution/2356. Number of Unique Subjects Taught by Each Teacher/2356. Number of Unique Subjects Taught by Each Teacher.sql b/LeetCode SQL 50 Solution/2356. Number of Unique Subjects Taught by Each Teacher/2356. Number of Unique Subjects Taught by Each Teacher.sql new file mode 100644 index 0000000..16e090a --- /dev/null +++ b/LeetCode SQL 50 Solution/2356. Number of Unique Subjects Taught by Each Teacher/2356. Number of Unique Subjects Taught by Each Teacher.sql @@ -0,0 +1,61 @@ +2356. Number of Unique Subjects Taught by Each Teacher + +Table: Teacher + ++-------------+------+ +| Column Name | Type | ++-------------+------+ +| teacher_id | int | +| subject_id | int | +| dept_id | int | ++-------------+------+ +(subject_id, dept_id) is the primary key (combinations of columns with unique values) of this table. +Each row in this table indicates that the teacher with teacher_id teaches the subject subject_id in the department dept_id. + + +Write a solution to calculate the number of unique subjects each teacher teaches in the university. + +Return the result table in any order. + +The result format is shown in the following example. + + + +Example 1: + +Input: +Teacher table: ++------------+------------+---------+ +| teacher_id | subject_id | dept_id | ++------------+------------+---------+ +| 1 | 2 | 3 | +| 1 | 2 | 4 | +| 1 | 3 | 3 | +| 2 | 1 | 1 | +| 2 | 2 | 1 | +| 2 | 3 | 1 | +| 2 | 4 | 1 | ++------------+------------+---------+ +Output: ++------------+-----+ +| teacher_id | cnt | ++------------+-----+ +| 1 | 2 | +| 2 | 4 | ++------------+-----+ +Explanation: +Teacher 1: + - They teach subject 2 in departments 3 and 4. + - They teach subject 3 in department 3. +Teacher 2: + - They teach subject 1 in department 1. + - They teach subject 2 in department 1. + - They teach subject 3 in department 1. + - They teach subject 4 in department 1. + + + # Write your MySQL query statement below +SELECT teacher_id, COUNT(DISTINCT subject_id) AS cnt +FROM Teacher +GROUP BY 1; + diff --git a/LeetCode SQL 50 Solution/2356. Number of Unique Subjects Taught by Each Teacher/readme.md b/LeetCode SQL 50 Solution/2356. Number of Unique Subjects Taught by Each Teacher/readme.md new file mode 100644 index 0000000..e752e35 --- /dev/null +++ b/LeetCode SQL 50 Solution/2356. Number of Unique Subjects Taught by Each Teacher/readme.md @@ -0,0 +1,116 @@ +# πŸ“š Number of Unique Subjects Taught by Each Teacher - LeetCode 2356 + +## πŸ“Œ Problem Statement +You are given a table **Teacher** that provides information about the subjects taught by teachers in various departments of a university. + +Your task is to calculate the **number of unique subjects** each teacher teaches. +Note that the table can have multiple rows for the same subject taught in different departments, but you should count each subject only once per teacher. + +Return the result table in **any order**. + +--- + +## πŸ“Š Table Structure + +### **Teacher Table** +| Column Name | Type | +| ----------- | ---- | +| teacher_id | int | +| subject_id | int | +| dept_id | int | + +- `(subject_id, dept_id)` is the **primary key**. +- Each row indicates that the teacher with `teacher_id` teaches the subject `subject_id` in the department `dept_id`. + +--- + +## πŸ“Š Example 1: + +### **Input:** +#### **Teacher Table** +| teacher_id | subject_id | dept_id | +| ---------- | ---------- | ------- | +| 1 | 2 | 3 | +| 1 | 2 | 4 | +| 1 | 3 | 3 | +| 2 | 1 | 1 | +| 2 | 2 | 1 | +| 2 | 3 | 1 | +| 2 | 4 | 1 | + +### **Output:** +| teacher_id | cnt | +| ---------- | --- | +| 1 | 2 | +| 2 | 4 | + +### **Explanation:** +- **Teacher 1:** + - Teaches subject **2** (in departments 3 and 4) and subject **3** (in department 3). + - Unique subjects = {2, 3} β†’ **2 subjects**. +- **Teacher 2:** + - Teaches subjects **1**, **2**, **3**, and **4** (all in department 1). + - Unique subjects = {1, 2, 3, 4} β†’ **4 subjects**. + +--- + +## πŸ–₯ SQL Solution + +### βœ… **Approach:** +- Use `COUNT(DISTINCT subject_id)` to count the number of unique subjects taught by each teacher. +- Group the results by `teacher_id`. + +```sql +SELECT teacher_id, COUNT(DISTINCT subject_id) AS cnt +FROM Teacher +GROUP BY teacher_id; +``` + +--- + +## 🐍 Python (Pandas) Solution + +### βœ… **Approach:** +1. **Group by `teacher_id`:** + - Group the DataFrame by `teacher_id`. +2. **Count Unique Subjects:** + - Use the `nunique()` function on the `subject_id` column within each group to count unique subjects. +3. **Reset Index and Rename:** + - Reset the index and rename the column appropriately. + +```python +import pandas as pd + +def count_unique_subjects(teacher: pd.DataFrame) -> pd.DataFrame: + # Group by teacher_id and count unique subject_id values + result = teacher.groupby('teacher_id')['subject_id'].nunique().reset_index() + result = result.rename(columns={'subject_id': 'cnt'}) + return result + +# Example usage: +# teacher_df = pd.DataFrame({ +# 'teacher_id': [1, 1, 1, 2, 2, 2, 2], +# 'subject_id': [2, 2, 3, 1, 2, 3, 4], +# 'dept_id': [3, 4, 3, 1, 1, 1, 1] +# }) +# print(count_unique_subjects(teacher_df)) +``` + +--- + +## πŸ“ File Structure +``` +πŸ“‚ Unique-Subjects-Per-Teacher +│── README.md +│── solution.sql +│── solution_pandas.py +│── test_cases.sql +│── sample_data.csv +``` + +--- + +## πŸ”— Useful Links +- πŸ“– [LeetCode Problem](https://leetcode.com/problems/number-of-unique-subjects-taught-by-each-teacher/) +- πŸ” [MySQL COUNT(DISTINCT) Documentation](https://www.w3schools.com/sql/sql_count_distinct.asp) +- 🐍 [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/550. Game Play Analysis IV/550. Game Play Analysis IV.py b/LeetCode SQL 50 Solution/550. Game Play Analysis IV/550. Game Play Analysis IV.py new file mode 100644 index 0000000..37cc030 --- /dev/null +++ b/LeetCode SQL 50 Solution/550. Game Play Analysis IV/550. Game Play Analysis IV.py @@ -0,0 +1,22 @@ +import pandas as pd + +def game_play_analysis(activity: pd.DataFrame) -> pd.DataFrame: + # Get first login date for each player + first_login = activity.groupby("player_id")["event_date"].min().reset_index() + first_login.columns = ["player_id", "first_login"] + + # Merge first login date with original table + merged = activity.merge(first_login, on="player_id") + + # Filter players who logged in the next day + next_day_logins = merged[ + (merged["event_date"] - merged["first_login"]).dt.days == 1 + ]["player_id"].nunique() + + # Total unique players + total_players = activity["player_id"].nunique() + + # Calculate fraction + fraction = round(next_day_logins / total_players, 2) + + return pd.DataFrame({"fraction": [fraction]}) \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/550. Game Play Analysis IV/550. Game Play Analysis IV.sql b/LeetCode SQL 50 Solution/550. Game Play Analysis IV/550. Game Play Analysis IV.sql new file mode 100644 index 0000000..6a83794 --- /dev/null +++ b/LeetCode SQL 50 Solution/550. Game Play Analysis IV/550. Game Play Analysis IV.sql @@ -0,0 +1,55 @@ +550. Game Play Analysis IV + +Table: Activity + ++--------------+---------+ +| Column Name | Type | ++--------------+---------+ +| player_id | int | +| device_id | int | +-- | event_date | date | +| games_played | int | ++--------------+---------+ +(player_id, event_date) is the primary key (combination of columns with unique values) of this table. +This table shows the activity of players of some games. +Each row is a record of a player who logged in and played a number of games (possibly 0) before logging out on someday using some device. + + +Write a solution to report the fraction of players that logged in again on the day after the day they first logged in, rounded to 2 decimal places. In other words, you need to count the number of players that logged in for at least two consecutive days starting from their first login date, then divide that number by the total number of players. + +The result format is in the following example. + + + +Example 1: + +Input: +Activity table: ++-----------+-----------+------------+--------------+ +| player_id | device_id | event_date | games_played | ++-----------+-----------+------------+--------------+ +| 1 | 2 | 2016-03-01 | 5 | +| 1 | 2 | 2016-03-02 | 6 | +| 2 | 3 | 2017-06-25 | 1 | +| 3 | 1 | 2016-03-02 | 0 | +| 3 | 4 | 2018-07-03 | 5 | ++-----------+-----------+------------+--------------+ +Output: ++-----------+ +| fraction | ++-----------+ +| 0.33 | ++-----------+ +Explanation: +Only the player with id 1 logged back in after the first day he had logged in so the answer is 1/3 = 0.33 + +# Write your MySQL query statement below +select +round((select count(distinct a.player_id) from Activity a +inner join +(select player_id, min(event_date) as first_logged + from Activity + group by player_id) b on datediff(a.event_date, b.first_logged)=1 + and a.player_id = b.player_id) + / + (select count(distinct player_id) from Activity),2) as fraction; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/550. Game Play Analysis IV/readme.md b/LeetCode SQL 50 Solution/550. Game Play Analysis IV/readme.md new file mode 100644 index 0000000..51b11bd --- /dev/null +++ b/LeetCode SQL 50 Solution/550. Game Play Analysis IV/readme.md @@ -0,0 +1,164 @@ +# **550. Game Play Analysis IV** + +## **Problem Statement** +You are given a table named `Activity`, which logs the gaming activity of players. + +### **Activity Table** +```rb ++--------------+---------+ +| Column Name | Type | ++--------------+---------+ +| player_id | int | +| device_id | int | +| event_date | date | +| games_played | int | ++--------------+---------+ +``` +- **(player_id, event_date)** is the **primary key**. +- Each row contains: + - `player_id`: The ID of the player. + - `event_date`: The date when the player logged in. + - `games_played`: The number of games played before logging out. + +### **Task:** +Find the **fraction** of players who logged in **again** the day after their **first login date**, rounded to **2 decimal places**. + +--- + +## **Example 1:** +### **Input:** +#### **Activity Table** +```rb ++-----------+-----------+------------+--------------+ +| player_id | device_id | event_date | games_played | ++-----------+-----------+------------+--------------+ +| 1 | 2 | 2016-03-01 | 5 | +| 1 | 2 | 2016-03-02 | 6 | +| 2 | 3 | 2017-06-25 | 1 | +| 3 | 1 | 2016-03-02 | 0 | +| 3 | 4 | 2018-07-03 | 5 | ++-----------+-----------+------------+--------------+ +``` +### **Output:** +```rb ++-----------+ +| fraction | ++-----------+ +| 0.33 | ++-----------+ +``` +### **Explanation:** +- `player_id = 1`: First login on **2016-03-01**, logs in again on **2016-03-02** βœ… +- `player_id = 2`: First login on **2017-06-25**, no next-day login ❌ +- `player_id = 3`: First login on **2016-03-02**, no next-day login ❌ + +Total players = **3**, Players who logged in the next day = **1** β†’ **1 / 3 = 0.33** βœ… + +--- + +## **Solution Approaches** + +### **SQL Solution (Using `JOIN` & `DATEDIFF`)** +```sql +SELECT + ROUND(( + SELECT COUNT(DISTINCT a.player_id) + FROM Activity a + INNER JOIN ( + SELECT player_id, MIN(event_date) AS first_login + FROM Activity + GROUP BY player_id + ) b + ON a.player_id = b.player_id + AND DATEDIFF(a.event_date, b.first_login) = 1 + ) / + (SELECT COUNT(DISTINCT player_id) FROM Activity), 2) AS fraction; +``` +**Explanation:** +1. **Find First Login Date per Player** + - `MIN(event_date) AS first_login` + - **Grouped by** `player_id` +2. **Find Players Who Logged in on the Next Day** + - **Join** the table with itself. + - Use `DATEDIFF(a.event_date, b.first_login) = 1` to check next-day logins. + - Count unique `player_id`s. +3. **Calculate Fraction** + - Divide by total distinct `player_id`s. + - Round to **2 decimal places**. + +--- + +### **Alternative SQL Solution (Using `EXISTS`)** +```sql +SELECT ROUND( + (SELECT COUNT(DISTINCT player_id) + FROM Activity a + WHERE EXISTS ( + SELECT 1 FROM Activity b + WHERE a.player_id = b.player_id + AND DATEDIFF(b.event_date, a.event_date) = 1 + )) / + (SELECT COUNT(DISTINCT player_id) FROM Activity), 2) AS fraction; +``` +**Explanation:** +- Checks if a player has **ANY** login exactly **one day after**. +- Uses `EXISTS` to optimize performance. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def game_play_analysis(activity: pd.DataFrame) -> pd.DataFrame: + # Get first login date for each player + first_login = activity.groupby("player_id")["event_date"].min().reset_index() + first_login.columns = ["player_id", "first_login"] + + # Merge first login date with original table + merged = activity.merge(first_login, on="player_id") + + # Filter players who logged in the next day + next_day_logins = merged[ + (merged["event_date"] - merged["first_login"]).dt.days == 1 + ]["player_id"].nunique() + + # Total unique players + total_players = activity["player_id"].nunique() + + # Calculate fraction + fraction = round(next_day_logins / total_players, 2) + + return pd.DataFrame({"fraction": [fraction]}) +``` +**Explanation:** +1. **Find First Login Date** + - Group by `player_id`, get `min(event_date)`. +2. **Merge with Original Table** + - Check if `event_date - first_login = 1 day`. +3. **Count Unique Players** + - Divide by total unique `player_id`s. + +--- + +## **File Structure** +``` +πŸ“‚ LeetCode550 +│── πŸ“œ problem_statement.md +│── πŸ“œ sql_solution.sql +│── πŸ“œ sql_exists_solution.sql +│── πŸ“œ pandas_solution.py +│── πŸ“œ README.md +``` +- `problem_statement.md` β†’ Contains the problem description. +- `sql_solution.sql` β†’ SQL solution using **JOIN & DATEDIFF**. +- `sql_exists_solution.sql` β†’ SQL solution using **EXISTS**. +- `pandas_solution.py` β†’ Pandas solution. +- `README.md` β†’ Overview of problem and solutions. + +--- + +## **Useful Links** +- [LeetCode Problem 550](https://leetcode.com/problems/game-play-analysis-iv/) +- [SQL `DATEDIFF()`](https://www.w3schools.com/sql/func_mysql_datediff.asp) +- [Pandas `.groupby()`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/570. Managers with at Least 5 Direct Reports.py b/LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/570. Managers with at Least 5 Direct Reports.py new file mode 100644 index 0000000..73f3df8 --- /dev/null +++ b/LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/570. Managers with at Least 5 Direct Reports.py @@ -0,0 +1,16 @@ +import pandas as pd + +def managers_with_five_reports(employee: pd.DataFrame) -> None: + # Count direct reports for each manager + report_counts = employee.groupby('managerId').size().reset_index(name='report_count') + + # Identify managerIds with at least five direct reports + valid_managers = report_counts[report_counts['report_count'] >= 5]['managerId'] + + # Filter the Employee table to get manager names + # Note: Since managerId can be null, we ignore them during merge. + result = employee[employee['id'].isin(valid_managers)][['name']] + + # Modify the original DataFrame in place if required. + employee = result + print(result) \ No newline at end of file diff --git a/570. Managers with at Least 5 Direct Reports.sql b/LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/570. Managers with at Least 5 Direct Reports.sql similarity index 100% rename from 570. Managers with at Least 5 Direct Reports.sql rename to LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/570. Managers with at Least 5 Direct Reports.sql diff --git a/LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/readme.md b/LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/readme.md new file mode 100644 index 0000000..407b040 --- /dev/null +++ b/LeetCode SQL 50 Solution/570. Managers with at Least 5 Direct Reports/readme.md @@ -0,0 +1,140 @@ +# **570. Managers with at Least 5 Direct Reports** + +## **Problem Statement** +You are given a table `Employee` that holds all employee records, including their managers. Every employee has an `id`, a `name`, a `department`, and a `managerId`. + +### **Employee Table** +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| name | varchar | +| department | varchar | +| managerId | int | ++-------------+---------+ +``` +- `id` is the **primary key**. +- `managerId` is a **foreign key** that references the `id` of a manager in the same table. +- If `managerId` is `null`, then the employee does not have a manager. +- No employee will be the manager of themselves. + +### **Task:** +Write a solution to find all managers (i.e., employees who appear as a managerId in the table) with **at least five direct reports**. + +The result table should display the manager’s name in any order. + +--- + +## **Example 1:** + +### **Input:** +#### **Employee Table** +``` ++-----+-------+------------+-----------+ +| id | name | department | managerId | ++-----+-------+------------+-----------+ +| 101 | John | A | null | +| 102 | Dan | A | 101 | +| 103 | James | A | 101 | +| 104 | Amy | A | 101 | +| 105 | Anne | A | 101 | +| 106 | Ron | B | 101 | ++-----+-------+------------+-----------+ +``` + +### **Output:** +``` ++------+ +| name | ++------+ +| John | ++------+ +``` + +### **Explanation:** +- **John** (id = 101) is the only manager who has **5 direct reports** (ids 102, 103, 104, 105, and 106). + +--- + +## **Solution Approaches** + +### **SQL Solution (Using Subquery)** +```sql +SELECT name +FROM Employee +WHERE id IN ( + SELECT managerId + FROM Employee + GROUP BY managerId + HAVING COUNT(*) >= 5 +); +``` +**Explanation:** +- The subquery groups the `Employee` table by `managerId` and counts the number of direct reports. +- Only managers with a count of **5 or more** are selected. +- The outer query then retrieves the names of those managers. + +--- + +### **SQL Solution (Using JOIN and Window Functions)** +```sql +SELECT name +FROM Employee +JOIN ( + SELECT managerId + FROM Employee + GROUP BY managerId + HAVING COUNT(*) >= 5 +) AS managers +ON Employee.id = managers.managerId; +``` +**Explanation:** +- The inner query identifies all `managerId`s with **at least five** direct reports. +- The outer query then joins on the `Employee` table to fetch the corresponding manager names. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def managers_with_five_reports(employee: pd.DataFrame) -> None: + # Count direct reports for each manager + report_counts = employee.groupby('managerId').size().reset_index(name='report_count') + + # Identify managerIds with at least five direct reports + valid_managers = report_counts[report_counts['report_count'] >= 5]['managerId'] + + # Filter the Employee table to get manager names + # Note: Since managerId can be null, we ignore them during merge. + result = employee[employee['id'].isin(valid_managers)][['name']] + + # Modify the original DataFrame in place if required. + employee = result + print(result) +``` +**Explanation:** +- Group the table by `managerId` and count the number of direct reports. +- Filter out the managers having at least 5 direct reports. +- Finally, retrieve the names of these managers. + +--- + +## **File Structure** +``` +LeetCode570/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_subquery_solution.sql # SQL solution using subquery. +β”œβ”€β”€ sql_join_solution.sql # SQL solution using JOIN. +β”œβ”€β”€ pandas_solution.py # Pandas solution for Python users. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 570](https://leetcode.com/problems/managers-with-at-least-5-direct-reports/) +- [SQL DELETE and JOIN Syntax](https://www.w3schools.com/sql/sql_join.asp) +- [Pandas Documentation](https://pandas.pydata.org/docs/) + diff --git a/LeetCode SQL 50 Solution/584. Find Customer Referee/584. Find Customer Referee.py b/LeetCode SQL 50 Solution/584. Find Customer Referee/584. Find Customer Referee.py new file mode 100644 index 0000000..6685391 --- /dev/null +++ b/LeetCode SQL 50 Solution/584. Find Customer Referee/584. Find Customer Referee.py @@ -0,0 +1,6 @@ +import pandas as pd + +def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame: + # Filter rows where referee_id is not equal to 2 or is null + result = customer[(customer['referee_id'] != 2) | (customer['referee_id'].isnull())][['name']] + return result \ No newline at end of file diff --git a/584. Find Customer Referee.sql b/LeetCode SQL 50 Solution/584. Find Customer Referee/584. Find Customer Referee.sql similarity index 100% rename from 584. Find Customer Referee.sql rename to LeetCode SQL 50 Solution/584. Find Customer Referee/584. Find Customer Referee.sql diff --git a/LeetCode SQL 50 Solution/584. Find Customer Referee/readme.md b/LeetCode SQL 50 Solution/584. Find Customer Referee/readme.md new file mode 100644 index 0000000..9689cf0 --- /dev/null +++ b/LeetCode SQL 50 Solution/584. Find Customer Referee/readme.md @@ -0,0 +1,104 @@ +# **584. Find Customer Referee** + +## **Problem Statement** +You are given a table `Customer` that stores customer details along with the ID of the customer who referred them. + +### **Customer Table** +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| name | varchar | +| referee_id | int | ++-------------+---------+ +``` +- `id` is the **primary key**. +- Each row represents a customer with their `id`, `name`, and `referee_id`. +- `referee_id` indicates the customer who referred them. It can be **NULL** if no one referred the customer. + +### **Task:** +Find the names of the customers who are **not referred** by the customer with `id = 2`. + +--- + +## **Example 1:** + +### **Input:** +#### **Customer Table** +``` ++----+------+------------+ +| id | name | referee_id | ++----+------+------------+ +| 1 | Will | null | +| 2 | Jane | null | +| 3 | Alex | 2 | +| 4 | Bill | null | +| 5 | Zack | 1 | +| 6 | Mark | 2 | ++----+------+------------+ +``` + +### **Output:** +``` ++------+ +| name | ++------+ +| Will | +| Jane | +| Bill | +| Zack | ++------+ +``` + +### **Explanation:** +- **Alex** (id = 3) and **Mark** (id = 6) are referred by the customer with `id = 2` and are excluded. +- The remaining customers (**Will**, **Jane**, **Bill**, **Zack**) are not referred by the customer with `id = 2`. + +--- + +## **Solution Approaches** + +### **SQL Solution (Using WHERE Clause)** +```sql +SELECT name +FROM Customer +WHERE referee_id != 2 OR referee_id IS NULL; +``` +**Explanation:** +- The query selects customer names where `referee_id` is either not equal to `2` or is `NULL`. +- This effectively filters out customers referred by the customer with `id = 2`. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame: + # Filter rows where referee_id is not equal to 2 or is null + result = customer[(customer['referee_id'] != 2) | (customer['referee_id'].isnull())][['name']] + return result +``` +**Explanation:** +- The Pandas solution filters the DataFrame for rows where `referee_id` is not 2 or is `NaN`. +- It then returns the `name` column containing the desired customer names. + +--- + +## **File Structure** +``` +LeetCode584/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solution. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 584](https://leetcode.com/problems/find-customer-referee/) +- [SQL WHERE Clause Documentation](https://www.w3schools.com/sql/sql_where.asp) +- [Pandas Filtering DataFrames](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) + diff --git a/LeetCode SQL 50 Solution/585. Investments in 2016/585. Investments in 2016.py b/LeetCode SQL 50 Solution/585. Investments in 2016/585. Investments in 2016.py new file mode 100644 index 0000000..54c7c50 --- /dev/null +++ b/LeetCode SQL 50 Solution/585. Investments in 2016/585. Investments in 2016.py @@ -0,0 +1,19 @@ +import pandas as pd + +def investments_in_2016(insurance: pd.DataFrame) -> pd.DataFrame: + # Count the number of occurrences for each tiv_2015 value + insurance['tiv_2015_count'] = insurance.groupby('tiv_2015')['tiv_2015'].transform('count') + + # Count the number of occurrences for each (lat, lon) pair + insurance['city_count'] = insurance.groupby(['lat', 'lon'])['lat'].transform('count') + + # Filter rows that meet both criteria: + # 1. tiv_2015 appears more than once. + # 2. The location (lat, lon) is unique (appears only once). + valid_rows = insurance[(insurance['tiv_2015_count'] > 1) & (insurance['city_count'] == 1)] + + # Calculate the sum of tiv_2016 and round to 2 decimal places + total_tiv_2016 = round(valid_rows['tiv_2016'].sum(), 2) + + # Return result as a DataFrame + return pd.DataFrame({'tiv_2016': [total_tiv_2016]}) \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/585. Investments in 2016/585. Investments in 2016.sql b/LeetCode SQL 50 Solution/585. Investments in 2016/585. Investments in 2016.sql new file mode 100644 index 0000000..abad2d8 --- /dev/null +++ b/LeetCode SQL 50 Solution/585. Investments in 2016/585. Investments in 2016.sql @@ -0,0 +1,70 @@ +585. Investments in 2016 +""" +Table: Insurance + ++-------------+-------+ +| Column Name | Type | ++-------------+-------+ +| pid | int | +| tiv_2015 | float | +| tiv_2016 | float | +| lat | float | +| lon | float | ++-------------+-------+ +pid is the primary key (column with unique values) for this table. +Each row of this table contains information about one policy where: +pid is the policyholder's policy ID. +tiv_2015 is the total investment value in 2015 and tiv_2016 is the total investment value in 2016. +lat is the latitude of the policy holder's city. It's guaranteed that lat is not NULL. +lon is the longitude of the policy holder's city. It's guaranteed that lon is not NULL. + + +Write a solution to report the sum of all total investment values in 2016 tiv_2016, for all policyholders who: + +have the same tiv_2015 value as one or more other policyholders, and +are not located in the same city as any other policyholder (i.e., the (lat, lon) attribute pairs must be unique). +Round tiv_2016 to two decimal places. + +The result format is in the following example. + + + +Example 1: + +Input: +Insurance table: ++-----+----------+----------+-----+-----+ +| pid | tiv_2015 | tiv_2016 | lat | lon | ++-----+----------+----------+-----+-----+ +| 1 | 10 | 5 | 10 | 10 | +| 2 | 20 | 20 | 20 | 20 | +| 3 | 10 | 30 | 20 | 20 | +| 4 | 10 | 40 | 40 | 40 | ++-----+----------+----------+-----+-----+ +Output: ++----------+ +| tiv_2016 | ++----------+ +| 45.00 | ++----------+ +Explanation: +The first record in the table, like the last record, meets both of the two criteria. +The tiv_2015 value 10 is the same as the third and fourth records, and its location is unique. + +The second record does not meet any of the two criteria. Its tiv_2015 is not like any other policyholders and its location is the same as the third record, which makes the third record fail, too. +So, the result is the sum of tiv_2016 of the first and last record, which is 45. + +""" + +WITH + InsuranceWithCounts AS ( + SELECT + tiv_2016, + COUNT(*) OVER(PARTITION by tiv_2015) AS tiv_2015_count, + COUNT(*) OVER(PARTITION by lat, lon) AS city_count + FROM Insurance + ) +SELECT ROUND(SUM(tiv_2016), 2) AS tiv_2016 +FROM InsuranceWithCounts +WHERE tiv_2015_count > 1 + AND city_count = 1; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/585. Investments in 2016/readme.md b/LeetCode SQL 50 Solution/585. Investments in 2016/readme.md new file mode 100644 index 0000000..4314981 --- /dev/null +++ b/LeetCode SQL 50 Solution/585. Investments in 2016/readme.md @@ -0,0 +1,141 @@ + + +# **585. Investments in 2016** + +## **Problem Statement** +You are given a table `Insurance` that contains details about policy investments. + +### **Insurance Table** +``` ++-------------+-------+ +| Column Name | Type | ++-------------+-------+ +| pid | int | +| tiv_2015 | float | +| tiv_2016 | float | +| lat | float | +| lon | float | ++-------------+-------+ +``` +- `pid` is the **primary key**. +- `tiv_2015` is the total investment value in **2015**. +- `tiv_2016` is the total investment value in **2016**. +- `lat` and `lon` represent the **latitude** and **longitude** of the policyholder's city. Both values are guaranteed not to be `NULL`. + +### **Task:** +Report the **sum** of all `tiv_2016` values (rounded to two decimal places) for all policyholders who: +1. Have the **same `tiv_2015`** value as one or more other policyholders. +2. Are **not located** in the same city as any other policyholder (i.e., the `(lat, lon)` attribute pair is **unique**). + +--- + +## **Example 1:** + +### **Input:** +#### **Insurance Table** +``` ++-----+----------+----------+-----+-----+ +| pid | tiv_2015 | tiv_2016 | lat | lon | ++-----+----------+----------+-----+-----+ +| 1 | 10 | 5 | 10 | 10 | +| 2 | 20 | 20 | 20 | 20 | +| 3 | 10 | 30 | 20 | 20 | +| 4 | 10 | 40 | 40 | 40 | ++-----+----------+----------+-----+-----+ +``` + +### **Output:** +``` ++----------+ +| tiv_2016 | ++----------+ +| 45.00 | ++----------+ +``` + +### **Explanation:** +- The policyholders with `tiv_2015 = 10` appear in multiple rows. +- Among these, only the records with **unique locations** count. +- Policy `pid = 1` and `pid = 4` meet both criteria, so the result is the sum of their `tiv_2016`: **5 + 40 = 45.00**. + +--- + +## **Solution Approaches** + +### **SQL Solution (Using Window Functions)** +```sql +WITH InsuranceWithCounts AS ( + SELECT + tiv_2016, + COUNT(*) OVER(PARTITION BY tiv_2015) AS tiv_2015_count, + COUNT(*) OVER(PARTITION BY lat, lon) AS city_count + FROM Insurance +) +SELECT ROUND(SUM(tiv_2016), 2) AS tiv_2016 +FROM InsuranceWithCounts +WHERE tiv_2015_count > 1 + AND city_count = 1; +``` +**Explanation:** +- The CTE `InsuranceWithCounts` computes: + - `tiv_2015_count`: Number of records with the same `tiv_2015`. + - `city_count`: Number of records with the same `(lat, lon)` pair. +- The outer query filters rows where: + - `tiv_2015_count > 1` (i.e., policyholders share their 2015 investment value with others). + - `city_count = 1` (i.e., their location is unique). +- Finally, it sums `tiv_2016` and rounds the result to 2 decimal places. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def investments_in_2016(insurance: pd.DataFrame) -> pd.DataFrame: + # Count the number of occurrences for each tiv_2015 value + insurance['tiv_2015_count'] = insurance.groupby('tiv_2015')['tiv_2015'].transform('count') + + # Count the number of occurrences for each (lat, lon) pair + insurance['city_count'] = insurance.groupby(['lat', 'lon'])['lat'].transform('count') + + # Filter rows that meet both criteria: + # 1. tiv_2015 appears more than once. + # 2. The location (lat, lon) is unique (appears only once). + valid_rows = insurance[(insurance['tiv_2015_count'] > 1) & (insurance['city_count'] == 1)] + + # Calculate the sum of tiv_2016 and round to 2 decimal places + total_tiv_2016 = round(valid_rows['tiv_2016'].sum(), 2) + + # Return result as a DataFrame + return pd.DataFrame({'tiv_2016': [total_tiv_2016]}) + +# Example usage: +# df = pd.read_csv('insurance.csv') +# print(investments_in_2016(df)) +``` +**Explanation:** +- The code computes two new columns: + - `tiv_2015_count` for the number of policyholders with the same 2015 investment. + - `city_count` for the count of policyholders in each unique city (using `(lat, lon)`). +- Rows that meet the conditions are filtered. +- The `tiv_2016` values of the valid rows are summed and rounded to 2 decimal places. +- The result is returned as a DataFrame. + +--- + +## **File Structure** +``` +LeetCode585/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # SQL solution using window functions. +β”œβ”€β”€ pandas_solution.py # Pandas solution for Python users. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 585](https://leetcode.com/problems/investments-in-2016/) +- [SQL Window Functions](https://www.w3schools.com/sql/sql_window.asp) +- [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) +- [Pandas Transform Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.transform.html) diff --git a/LeetCode SQL 50 Solution/595. Big Countries/595. Big Countries.py b/LeetCode SQL 50 Solution/595. Big Countries/595. Big Countries.py new file mode 100644 index 0000000..83c6cd5 --- /dev/null +++ b/LeetCode SQL 50 Solution/595. Big Countries/595. Big Countries.py @@ -0,0 +1,6 @@ +import pandas as pd + +def big_countries(world: pd.DataFrame) -> pd.DataFrame: + # Filter countries that are considered big by either area or population + result = world[(world['area'] >= 3000000) | (world['population'] >= 25000000)][['name', 'population', 'area']] + return result \ No newline at end of file diff --git a/595. Big Countries.sql b/LeetCode SQL 50 Solution/595. Big Countries/595. Big Countries.sql similarity index 100% rename from 595. Big Countries.sql rename to LeetCode SQL 50 Solution/595. Big Countries/595. Big Countries.sql diff --git a/LeetCode SQL 50 Solution/595. Big Countries/readme.md b/LeetCode SQL 50 Solution/595. Big Countries/readme.md new file mode 100644 index 0000000..932d777 --- /dev/null +++ b/LeetCode SQL 50 Solution/595. Big Countries/readme.md @@ -0,0 +1,135 @@ +# **595. Big Countries** + +## **Problem Statement** +You are given a table `World` that contains information about countries. + +### **World Table** +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| name | varchar | +| continent | varchar | +| area | int | +| population | int | +| gdp | bigint | ++-------------+---------+ +``` +- `name` is the **primary key**. +- Each row contains: + - `name`: Name of the country. + - `continent`: Continent the country belongs to. + - `area`: Area of the country (in kmΒ²). + - `population`: Population of the country. + - `gdp`: GDP of the country. + +### **Task:** +A country is considered **big** if: +- It has an **area** of at least **3,000,000 kmΒ²**, **or** +- It has a **population** of at least **25,000,000**. + +Write a solution to find the **name**, **population**, and **area** of the big countries. + +Return the result table in **any order**. + +--- + +## **Example 1:** + +### **Input:** +#### **World Table** +``` ++-------------+-----------+---------+------------+--------------+ +| name | continent | area | population | gdp | ++-------------+-----------+---------+------------+--------------+ +| Afghanistan | Asia | 652230 | 25500100 | 20343000000 | +| Albania | Europe | 28748 | 2831741 | 12960000000 | +| Algeria | Africa | 2381741 | 37100000 | 188681000000 | +| Andorra | Europe | 468 | 78115 | 3712000000 | +| Angola | Africa | 1246700 | 20609294 | 100990000000 | ++-------------+-----------+---------+------------+--------------+ +``` + +### **Output:** +``` ++-------------+------------+---------+ +| name | population | area | ++-------------+------------+---------+ +| Afghanistan | 25500100 | 652230 | +| Algeria | 37100000 | 2381741 | ++-------------+------------+---------+ +``` + +### **Explanation:** +- **Afghanistan** is not big by area (652,230 < 3,000,000) but is big by population (25,500,100 β‰₯ 25,000,000). +- **Algeria** is big by population (37,100,000 β‰₯ 25,000,000), even though its area (2,381,741) is less than 3,000,000. +- The other countries do not meet either condition. + +--- + +## **Solution Approaches** + +### **SQL Solution (Using UNION)** +```sql +SELECT name, population, area +FROM World +WHERE area >= 3000000 +UNION +SELECT name, population, area +FROM World +WHERE population >= 25000000; +``` +**Explanation:** +- The first `SELECT` returns countries with an area of at least 3,000,000 kmΒ². +- The second `SELECT` returns countries with a population of at least 25,000,000. +- `UNION` combines these two result sets, ensuring unique rows. + +--- + +### **SQL Alternative (Using OR)** +```sql +SELECT name, population, area +FROM World +WHERE area >= 3000000 OR population >= 25000000; +``` +**Explanation:** +- This query uses a single `SELECT` statement with an `OR` condition to capture countries that meet either criterion. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def big_countries(world: pd.DataFrame) -> pd.DataFrame: + # Filter countries that are considered big by either area or population + result = world[(world['area'] >= 3000000) | (world['population'] >= 25000000)][['name', 'population', 'area']] + return result + +# Example usage: +# world_df = pd.read_csv('world.csv') +# print(big_countries(world_df)) +``` +**Explanation:** +- The Pandas solution filters the DataFrame based on the condition that `area` is at least 3,000,000 or `population` is at least 25,000,000. +- It then returns the columns `name`, `population`, and `area`. + +--- + +## **File Structure** +``` +LeetCode595/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_union_solution.sql # SQL solution using UNION. +β”œβ”€β”€ sql_or_solution.sql # SQL alternative solution using OR. +β”œβ”€β”€ pandas_solution.py # Pandas solution for Python users. +β”œβ”€β”€ README.md # Overview of the problem and solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 595](https://leetcode.com/problems/big-countries/) +- [SQL WHERE Clause](https://www.w3schools.com/sql/sql_where.asp) +- [Pandas Documentation](https://pandas.pydata.org/docs/) + diff --git a/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/596. Classes More Than 5 Students.py b/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/596. Classes More Than 5 Students.py new file mode 100644 index 0000000..e93ee24 --- /dev/null +++ b/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/596. Classes More Than 5 Students.py @@ -0,0 +1,7 @@ +import pandas as pd + +def classes_with_five_or_more_students(courses: pd.DataFrame) -> pd.DataFrame: + # Group by 'class' and count the number of students + result = courses.groupby('class').filter(lambda x: len(x) >= 5) + # Return only the distinct class names + return result[['class']].drop_duplicates().reset_index(drop=True) \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/596. Classes More Than 5 Students.sql b/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/596. Classes More Than 5 Students.sql new file mode 100644 index 0000000..9a61ac4 --- /dev/null +++ b/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/596. Classes More Than 5 Students.sql @@ -0,0 +1,55 @@ +596. Classes More Than 5 Students + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| student | varchar | +| class | varchar | ++-------------+---------+ +(student, class) is the primary key (combination of columns with unique values) for this table. +Each row of this table indicates the name of a student and the class in which they are enrolled. + + +Write a solution to find all the classes that have at least five students. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Courses table: ++---------+----------+ +| student | class | ++---------+----------+ +| A | Math | +| B | English | +| C | Math | +| D | Biology | +| E | Math | +| F | Computer | +| G | Math | +| H | Math | +| I | Math | ++---------+----------+ +Output: ++---------+ +| class | ++---------+ +| Math | ++---------+ +Explanation: +- Math has 6 students, so we include it. +- English has 1 student, so we do not include it. +- Biology has 1 student, so we do not include it. +- Computer has 1 student, so we do not include it. + + +# Write your MySQL query statement below +SELECT class +FROM Courses +GROUP BY class +HAVING COUNT(student) >= 5; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/readme.md b/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/readme.md new file mode 100644 index 0000000..3f667e2 --- /dev/null +++ b/LeetCode SQL 50 Solution/596. Classes More Than 5 Students/readme.md @@ -0,0 +1,110 @@ +# **596. Classes More Than 5 Students** + +## **Problem Statement** +You are given a table `Courses` that contains the names of students and the class in which they are enrolled. + +### **Courses Table** +``` ++---------+---------+ +| Column | Type | ++---------+---------+ +| student | varchar | +| class | varchar | ++---------+---------+ +``` +- The combination of `(student, class)` is the **primary key**. +- Each row indicates the name of a student and the class they are enrolled in. + +### **Task:** +Write a solution to find all the classes that have **at least five students**. + +Return the result table in **any order**. + +--- + +## **Example 1:** + +### **Input:** +``` +Courses table: ++---------+----------+ +| student | class | ++---------+----------+ +| A | Math | +| B | English | +| C | Math | +| D | Biology | +| E | Math | +| F | Computer | +| G | Math | +| H | Math | +| I | Math | ++---------+----------+ +``` + +### **Output:** +``` ++---------+ +| class | ++---------+ +| Math | ++---------+ +``` + +### **Explanation:** +- **Math** has 6 students, so it is included. +- **English**, **Biology**, and **Computer** have fewer than 5 students, so they are excluded. + +--- + +## **Solution Approaches** + +### **SQL Solution** +```sql +SELECT class +FROM Courses +GROUP BY class +HAVING COUNT(student) >= 5; +``` +**Explanation:** +- The query groups records by `class`. +- The `HAVING` clause filters out groups with fewer than 5 students. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def classes_with_five_or_more_students(courses: pd.DataFrame) -> pd.DataFrame: + # Group by 'class' and count the number of students + result = courses.groupby('class').filter(lambda x: len(x) >= 5) + # Return only the distinct class names + return result[['class']].drop_duplicates().reset_index(drop=True) + +# Example usage: +# courses_df = pd.read_csv('courses.csv') +# print(classes_with_five_or_more_students(courses_df)) +``` +**Explanation:** +- The Pandas solution groups the DataFrame by `class` and filters groups with 5 or more students. +- It then extracts and returns the distinct class names. + +--- + +## **File Structure** +``` +LeetCode596/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solution. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution for Python users. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 596](https://leetcode.com/problems/classes-more-than-5-students/) +- [SQL GROUP BY and HAVING Clause](https://www.w3schools.com/sql/sql_groupby.asp) +- [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) + diff --git a/LeetCode SQL 50 Solution/602. Friend Requests II/602. Friend Requests II.py b/LeetCode SQL 50 Solution/602. Friend Requests II/602. Friend Requests II.py new file mode 100644 index 0000000..1981f41 --- /dev/null +++ b/LeetCode SQL 50 Solution/602. Friend Requests II/602. Friend Requests II.py @@ -0,0 +1,16 @@ +import pandas as pd + +def most_friends(requests: pd.DataFrame) -> pd.DataFrame: + # Create a DataFrame that contains all friend relationships in both directions + friend_df = pd.concat([ + requests[['requester_id', 'accepter_id']].rename(columns={'requester_id': 'id', 'accepter_id': 'friend'}), + requests[['accepter_id', 'requester_id']].rename(columns={'accepter_id': 'id', 'requester_id': 'friend'}) + ]) + + # Count number of friends for each user + friend_counts = friend_df.groupby('id').size().reset_index(name='num') + + # Get the user with the most friends + max_friends = friend_counts.loc[friend_counts['num'].idxmax()] + + return pd.DataFrame({'id': [max_friends['id']], 'num': [max_friends['num']]}) diff --git a/LeetCode SQL 50 Solution/602. Friend Requests II/602. Friend Requests II.sql b/LeetCode SQL 50 Solution/602. Friend Requests II/602. Friend Requests II.sql new file mode 100644 index 0000000..99c5e2c --- /dev/null +++ b/LeetCode SQL 50 Solution/602. Friend Requests II/602. Friend Requests II.sql @@ -0,0 +1,67 @@ +602. Friend Requests II: Who Has the Most Friends +Solved +Medium +Topics +Companies +Hint +SQL Schema +Pandas Schema +Table: RequestAccepted + ++----------------+---------+ +| Column Name | Type | ++----------------+---------+ +| requester_id | int | +| accepter_id | int | +| accept_date | date | ++----------------+---------+ +(requester_id, accepter_id) is the primary key (combination of columns with unique values) for this table. +This table contains the ID of the user who sent the request, the ID of the user who received the request, and the date when the request was accepted. + + +Write a solution to find the people who have the most friends and the most friends number. + +The test cases are generated so that only one person has the most friends. + +The result format is in the following example. + + + +Example 1: + +Input: +RequestAccepted table: ++--------------+-------------+-------------+ +| requester_id | accepter_id | accept_date | ++--------------+-------------+-------------+ +| 1 | 2 | 2016/06/03 | +| 1 | 3 | 2016/06/08 | +| 2 | 3 | 2016/06/08 | +| 3 | 4 | 2016/06/09 | ++--------------+-------------+-------------+ +Output: ++----+-----+ +| id | num | ++----+-----+ +| 3 | 3 | ++----+-----+ +Explanation: +The person with id 3 is a friend of people 1, 2, and 4, so he has three friends in total, which is the most number than any others. + + +Follow up: In the real world, multiple people could have the same most number of friends. Could you find all these people in this case? + + + +# Write your MySQL query statement below +WITH + T AS ( + SELECT requester_id, accepter_id FROM RequestAccepted + UNION ALL + SELECT accepter_id, requester_id FROM RequestAccepted + ) +SELECT requester_id AS id, COUNT(1) AS num +FROM T +GROUP BY 1 +ORDER BY 2 DESC +LIMIT 1; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/602. Friend Requests II/readme.md b/LeetCode SQL 50 Solution/602. Friend Requests II/readme.md new file mode 100644 index 0000000..8be2c91 --- /dev/null +++ b/LeetCode SQL 50 Solution/602. Friend Requests II/readme.md @@ -0,0 +1,121 @@ +# **602. Friend Requests II: Who Has the Most Friends** + +## **Problem Statement** +You are given a table `RequestAccepted` that records friend request acceptances between users. + +### **RequestAccepted Table** +``` ++--------------+-------------+-------------+ +| Column Name | Type | ++--------------+-------------+ +| requester_id | int | +| accepter_id | int | +| accept_date | date | ++--------------+-------------+ +``` +- **(requester_id, accepter_id)** is the **primary key**. +- Each row indicates the user who sent a friend request (`requester_id`), the user who accepted it (`accepter_id`), and the date when the request was accepted. + +### **Task:** +Find the person who has the **most friends** along with the number of friends they have. +*Note:* The test cases are generated so that only one person has the most friends. + +--- + +## **Example 1:** + +### **Input:** +#### **RequestAccepted Table** +``` ++--------------+-------------+-------------+ +| requester_id | accepter_id | accept_date | ++--------------+-------------+-------------+ +| 1 | 2 | 2016/06/03 | +| 1 | 3 | 2016/06/08 | +| 2 | 3 | 2016/06/08 | +| 3 | 4 | 2016/06/09 | ++--------------+-------------+-------------+ +``` + +### **Output:** +``` ++----+-----+ +| id | num | ++----+-----+ +| 3 | 3 | ++----+-----+ +``` + +### **Explanation:** +- User with `id = 3` is friends with users 1, 2, and 4, making a total of **3 friends**β€”the highest among all users. + +--- + +## **Solution Approaches** + +### **SQL Solution (Using UNION ALL)** +```sql +WITH T AS ( + SELECT requester_id, accepter_id FROM RequestAccepted + UNION ALL + SELECT accepter_id, requester_id FROM RequestAccepted +) +SELECT requester_id AS id, COUNT(*) AS num +FROM T +GROUP BY requester_id +ORDER BY num DESC +LIMIT 1; +``` +**Explanation:** +- The CTE `T` creates a complete friendship list by combining both directions of the friend relationship. +- The outer query groups by `requester_id` (which now represents a user) and counts the number of occurrences (i.e., friends). +- The result is ordered by the friend count in descending order and limited to one row, returning the user with the most friends. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def most_friends(requests: pd.DataFrame) -> pd.DataFrame: + # Create a DataFrame that contains all friend relationships in both directions + friend_df = pd.concat([ + requests[['requester_id', 'accepter_id']].rename(columns={'requester_id': 'id', 'accepter_id': 'friend'}), + requests[['accepter_id', 'requester_id']].rename(columns={'accepter_id': 'id', 'requester_id': 'friend'}) + ]) + + # Count number of friends for each user + friend_counts = friend_df.groupby('id').size().reset_index(name='num') + + # Get the user with the most friends + max_friends = friend_counts.loc[friend_counts['num'].idxmax()] + + return pd.DataFrame({'id': [max_friends['id']], 'num': [max_friends['num']]}) + +# Example usage: +# requests_df = pd.read_csv('request_accepted.csv') +# print(most_friends(requests_df)) +``` +**Explanation:** +- The solution concatenates two DataFrames to consider friend relationships in both directions. +- It then groups by user `id` and counts the number of friends. +- The user with the maximum friend count is selected and returned. + +--- + +## **File Structure** +``` +LeetCode602/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solution using UNION ALL. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution for Python users. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 602](https://leetcode.com/problems/friend-requests-ii-who-has-the-most-friends/) +- [SQL UNION ALL Documentation](https://www.w3schools.com/sql/sql_union.asp) +- [Pandas concat Documentation](https://pandas.pydata.org/docs/reference/api/pandas.concat.html) +- [Pandas groupby Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) diff --git a/LeetCode SQL 50 Solution/610. Triangle Judgement/610. Triangle Judgement.py b/LeetCode SQL 50 Solution/610. Triangle Judgement/610. Triangle Judgement.py new file mode 100644 index 0000000..ea00949 --- /dev/null +++ b/LeetCode SQL 50 Solution/610. Triangle Judgement/610. Triangle Judgement.py @@ -0,0 +1,11 @@ +import pandas as pd + +def triangle_judgement(triangle: pd.DataFrame) -> pd.DataFrame: + # Create a new column 'triangle' based on the triangle inequality conditions + triangle['triangle'] = triangle.apply( + lambda row: 'Yes' if (row['x'] + row['y'] > row['z'] and + row['x'] + row['z'] > row['y'] and + row['y'] + row['z'] > row['x']) else 'No', + axis=1 + ) + return triangle \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/610. Triangle Judgement/610. Triangle Judgement.sql b/LeetCode SQL 50 Solution/610. Triangle Judgement/610. Triangle Judgement.sql new file mode 100644 index 0000000..3e353f0 --- /dev/null +++ b/LeetCode SQL 50 Solution/610. Triangle Judgement/610. Triangle Judgement.sql @@ -0,0 +1,52 @@ +610. Triangle Judgement +Solved +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Triangle + ++-------------+------+ +| Column Name | Type | ++-------------+------+ +| x | int | +| y | int | +| z | int | ++-------------+------+ +In SQL, (x, y, z) is the primary key column for this table. +Each row of this table contains the lengths of three line segments. + + +Report for every three line segments whether they can form a triangle. + +Return the result table in any order. + +The result format is in the following example. + + + +Example 1: + +Input: +Triangle table: ++----+----+----+ +| x | y | z | ++----+----+----+ +| 13 | 15 | 30 | +| 10 | 20 | 15 | ++----+----+----+ +Output: ++----+----+----+----------+ +| x | y | z | triangle | ++----+----+----+----------+ +| 13 | 15 | 30 | No | +| 10 | 20 | 15 | Yes | ++----+----+----+----------+ + + +# Write your MySQL query statement below +SELECT + *, + IF(x + y > z AND x + z > y AND y + z > x, 'Yes', 'No') AS triangle +FROM Triangle; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/610. Triangle Judgement/readme.md b/LeetCode SQL 50 Solution/610. Triangle Judgement/readme.md new file mode 100644 index 0000000..ed31c6e --- /dev/null +++ b/LeetCode SQL 50 Solution/610. Triangle Judgement/readme.md @@ -0,0 +1,109 @@ + +# **610. Triangle Judgement** + +## **Problem Statement** +You are given a table `Triangle` that contains three integer values representing the lengths of three line segments. + +### **Triangle Table** +``` ++-------------+------+ +| Column Name | Type | ++-------------+------+ +| x | int | +| y | int | +| z | int | ++-------------+------+ +``` +- **(x, y, z)** is the **primary key**. +- Each row represents the lengths of three line segments. + +### **Task:** +Report for each row whether the three line segments can form a triangle. A triangle can be formed if and only if the sum of any two sides is greater than the third side. + +--- + +## **Example 1:** + +### **Input:** +#### **Triangle Table** +``` ++----+----+----+ +| x | y | z | ++----+----+----+ +| 13 | 15 | 30 | +| 10 | 20 | 15 | ++----+----+----+ +``` + +### **Output:** +``` ++----+----+----+----------+ +| x | y | z | triangle | ++----+----+----+----------+ +| 13 | 15 | 30 | No | +| 10 | 20 | 15 | Yes | ++----+----+----+----------+ +``` + +### **Explanation:** +- For the first row: `13 + 15` is not greater than `30`, so the segments cannot form a triangle. +- For the second row: All conditions are met (`10+20 > 15`, `10+15 > 20`, `20+15 > 10`), so they form a triangle. + +--- + +## **Solution Approaches** + +### **SQL Solution** +```sql +SELECT + x, + y, + z, + IF(x + y > z AND x + z > y AND y + z > x, 'Yes', 'No') AS triangle +FROM Triangle; +``` +**Explanation:** +- The query checks if the sum of any two sides is greater than the third side. +- If all conditions are true, it returns `'Yes'`; otherwise, it returns `'No'`. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def triangle_judgement(triangle: pd.DataFrame) -> pd.DataFrame: + # Create a new column 'triangle' based on the triangle inequality conditions + triangle['triangle'] = triangle.apply( + lambda row: 'Yes' if (row['x'] + row['y'] > row['z'] and + row['x'] + row['z'] > row['y'] and + row['y'] + row['z'] > row['x']) else 'No', + axis=1 + ) + return triangle + +# Example usage: +# df = pd.DataFrame({'x': [13, 10], 'y': [15, 20], 'z': [30, 15]}) +# print(triangle_judgement(df)) +``` +**Explanation:** +- The Pandas solution uses `apply()` with a lambda function to evaluate the triangle inequality for each row. +- It then creates a new column `triangle` with the result `'Yes'` or `'No'`. + +--- + +## **File Structure** +``` +LeetCode610/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solution. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 610](https://leetcode.com/problems/triangle-judgement/) +- [SQL IF Function](https://www.w3schools.com/sql/func_mysql_if.asp) +- [Pandas apply() Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html) diff --git a/LeetCode SQL 50 Solution/619. Biggest Single Number/619. Biggest Single Number.py b/LeetCode SQL 50 Solution/619. Biggest Single Number/619. Biggest Single Number.py new file mode 100644 index 0000000..1c92778 --- /dev/null +++ b/LeetCode SQL 50 Solution/619. Biggest Single Number/619. Biggest Single Number.py @@ -0,0 +1,13 @@ +import pandas as pd + +def biggest_single_number(my_numbers: pd.DataFrame) -> pd.DataFrame: + # Group by 'num' and filter those numbers that appear exactly once + unique_numbers = my_numbers.groupby('num').filter(lambda group: len(group) == 1) + + # Determine the largest single number, if any + if unique_numbers.empty: + result = None + else: + result = unique_numbers['num'].max() + + return pd.DataFrame({'num': [result]}) diff --git a/LeetCode SQL 50 Solution/619. Biggest Single Number/619. Biggest Single Number.sql b/LeetCode SQL 50 Solution/619. Biggest Single Number/619. Biggest Single Number.sql new file mode 100644 index 0000000..0d98df2 --- /dev/null +++ b/LeetCode SQL 50 Solution/619. Biggest Single Number/619. Biggest Single Number.sql @@ -0,0 +1,77 @@ +619. Biggest Single Number + +Table: MyNumbers + ++-------------+------+ +| Column Name | Type | ++-------------+------+ +| num | int | ++-------------+------+ +This table may contain duplicates (In other words, there is no primary key for this table in SQL). +Each row of this table contains an integer. + + +A single number is a number that appeared only once in the MyNumbers table. + +Find the largest single number. If there is no single number, report null. + +The result format is in the following example. + + + +Example 1: + +Input: +MyNumbers table: ++-----+ +| num | ++-----+ +| 8 | +| 8 | +| 3 | +| 3 | +| 1 | +| 4 | +| 5 | +| 6 | ++-----+ +Output: ++-----+ +| num | ++-----+ +| 6 | ++-----+ +Explanation: The single numbers are 1, 4, 5, and 6. +Since 6 is the largest single number, we return it. +Example 2: + +Input: +MyNumbers table: ++-----+ +| num | ++-----+ +| 8 | +| 8 | +| 7 | +| 7 | +| 3 | +| 3 | +| 3 | ++-----+ +Output: ++------+ +| num | ++------+ +| null | ++------+ +Explanation: There are no single numbers in the input table so we return null. + + +# Write your MySQL query statement below +SELECT MAX(num) AS num +FROM ( + SELECT num + FROM MyNumbers + GROUP BY num + HAVING COUNT(num) = 1 +) AS unique_numbers; \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/619. Biggest Single Number/readme.md b/LeetCode SQL 50 Solution/619. Biggest Single Number/readme.md new file mode 100644 index 0000000..048f76d --- /dev/null +++ b/LeetCode SQL 50 Solution/619. Biggest Single Number/readme.md @@ -0,0 +1,151 @@ +# **619. Biggest Single Number** + +## **Problem Statement** +You are given a table `MyNumbers` that contains integers, which may include duplicates. + +### **MyNumbers Table** +```rb ++-------------+------+ +| Column Name | Type | ++-------------+------+ +| num | int | ++-------------+------+ +``` +- There is **no primary key** for this table. +- Each row contains an integer. + +### **Task:** +A **single number** is a number that appears **only once** in the `MyNumbers` table. +Find the **largest single number**. If there is no single number, report `null`. + +--- + +## **Example 1:** + +### **Input:** +```rb +MyNumbers table: ++-----+ +| num | ++-----+ +| 8 | +| 8 | +| 3 | +| 3 | +| 1 | +| 4 | +| 5 | +| 6 | ++-----+ +``` + +### **Output:** +```rb ++-----+ +| num | ++-----+ +| 6 | ++-----+ +``` + +### **Explanation:** +- The single numbers (appear exactly once) are: **1, 4, 5, 6**. +- The largest among these is **6**. + +--- + +## **Example 2:** + +### **Input:** +```rb +MyNumbers table: ++-----+ +| num | ++-----+ +| 8 | +| 8 | +| 7 | +| 7 | +| 3 | +| 3 | +| 3 | ++-----+ +``` + +### **Output:** +``` ++------+ +| num | ++------+ +| null | ++------+ +``` + +### **Explanation:** +- There are no single numbers (all numbers appear more than once), so the result is `null`. + +--- + +## **Solution Approaches** + +### **SQL Solution** +```sql +SELECT MAX(num) AS num +FROM ( + SELECT num + FROM MyNumbers + GROUP BY num + HAVING COUNT(num) = 1 +) AS unique_numbers; +``` +**Explanation:** +- The subquery groups by `num` and filters to include only those numbers that appear exactly once (`HAVING COUNT(num) = 1`). +- The outer query returns the maximum value from these unique numbers. +- If no unique number exists, `MAX(num)` returns `null`. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def biggest_single_number(my_numbers: pd.DataFrame) -> pd.DataFrame: + # Group by 'num' and filter those numbers that appear exactly once + unique_numbers = my_numbers.groupby('num').filter(lambda group: len(group) == 1) + + # Determine the largest single number, if any + if unique_numbers.empty: + result = None + else: + result = unique_numbers['num'].max() + + return pd.DataFrame({'num': [result]}) + +# Example usage: +# df = pd.DataFrame({'num': [8, 8, 3, 3, 1, 4, 5, 6]}) +# print(biggest_single_number(df)) +``` +**Explanation:** +- The solution groups the DataFrame by `num` and filters groups where the number appears exactly once. +- It then calculates the maximum from the filtered DataFrame. +- If there are no unique numbers, it returns `None`. + +--- + +## **File Structure** +``` +LeetCode619/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solution. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution for Python users. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 619](https://leetcode.com/problems/biggest-single-number/) +- [SQL GROUP BY and HAVING Clause](https://www.w3schools.com/sql/sql_groupby.asp) +- [Pandas GroupBy Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) +- [Pandas filter() Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.filter.html) + diff --git a/LeetCode SQL 50 Solution/620. Not Boring Movies/620. Not Boring Movies.py b/LeetCode SQL 50 Solution/620. Not Boring Movies/620. Not Boring Movies.py new file mode 100644 index 0000000..b1df1fa --- /dev/null +++ b/LeetCode SQL 50 Solution/620. Not Boring Movies/620. Not Boring Movies.py @@ -0,0 +1,7 @@ +import pandas as pd + +def not_boring_movies(cinema: pd.DataFrame) -> pd.DataFrame: + # Filter movies with odd-numbered id and description not equal to 'boring' + result = cinema[(cinema['id'] % 2 == 1) & (cinema['description'] != 'boring')] + # Sort the result by rating in descending order + return result.sort_values(by='rating', ascending=False) diff --git a/LeetCode SQL 50 Solution/620. Not Boring Movies/620. Not Boring Movies.sql b/LeetCode SQL 50 Solution/620. Not Boring Movies/620. Not Boring Movies.sql new file mode 100644 index 0000000..d194174 --- /dev/null +++ b/LeetCode SQL 50 Solution/620. Not Boring Movies/620. Not Boring Movies.sql @@ -0,0 +1,56 @@ +620. Not Boring Movies +Easy +Topics +Companies +SQL Schema +Pandas Schema +Table: Cinema + ++----------------+----------+ +| Column Name | Type | ++----------------+----------+ +| id | int | +| movie | varchar | +| description | varchar | +| rating | float | ++----------------+----------+ +id is the primary key (column with unique values) for this table. +Each row contains information about the name of a movie, its genre, and its rating. +rating is a 2 decimal places float in the range [0, 10] + + +Write a solution to report the movies with an odd-numbered ID and a description that is not "boring". + +Return the result table ordered by rating in descending order. + +The result format is in the following example. + + + +Example 1: + +Input: +Cinema table: ++----+------------+-------------+--------+ +| id | movie | description | rating | ++----+------------+-------------+--------+ +| 1 | War | great 3D | 8.9 | +| 2 | Science | fiction | 8.5 | +| 3 | irish | boring | 6.2 | +| 4 | Ice song | Fantacy | 8.6 | +| 5 | House card | Interesting | 9.1 | ++----+------------+-------------+--------+ +Output: ++----+------------+-------------+--------+ +| id | movie | description | rating | ++----+------------+-------------+--------+ +| 5 | House card | Interesting | 9.1 | +| 1 | War | great 3D | 8.9 | ++----+------------+-------------+--------+ +Explanation: +We have three movies with odd-numbered IDs: 1, 3, and 5. The movie with ID = 3 is boring so we do not include it in the answer. + +We sort the remaining movies by rating in descending order. + +# Write your MySQL query statement below +SELECT * FROM Cinema WHERE id % 2 = 1 AND description != 'boring' ORDER BY rating DESC diff --git a/LeetCode SQL 50 Solution/620. Not Boring Movies/readme.md b/LeetCode SQL 50 Solution/620. Not Boring Movies/readme.md new file mode 100644 index 0000000..9a6d9ef --- /dev/null +++ b/LeetCode SQL 50 Solution/620. Not Boring Movies/readme.md @@ -0,0 +1,117 @@ +# **620. Not Boring Movies** + +## **Problem Statement** +You are given a table `Cinema` that contains information about movies, their descriptions, and ratings. + +### **Cinema Table** +``` ++----------------+----------+ +| Column Name | Type | ++----------------+----------+ +| id | int | +| movie | varchar | +| description | varchar | +| rating | float | ++----------------+----------+ +``` +- `id` is the **primary key**. +- Each row provides details about a movie: + - `id`: The movie's unique identifier. + - `movie`: The name of the movie. + - `description`: The description or genre of the movie. + - `rating`: A float representing the movie's rating (in the range [0, 10] with 2 decimal places). + +### **Task:** +Write a solution to report the movies that have: +- An **odd-numbered `id`**. +- A `description` that is **not "boring"**. + +Return the result table **ordered by rating in descending order**. + +--- + +## **Example 1:** + +### **Input:** +#### **Cinema Table** +``` ++----+------------+-------------+--------+ +| id | movie | description | rating | ++----+------------+-------------+--------+ +| 1 | War | great 3D | 8.9 | +| 2 | Science | fiction | 8.5 | +| 3 | irish | boring | 6.2 | +| 4 | Ice song | Fantacy | 8.6 | +| 5 | House card | Interesting | 9.1 | ++----+------------+-------------+--------+ +``` + +### **Output:** +``` ++----+------------+-------------+--------+ +| id | movie | description | rating | ++----+------------+-------------+--------+ +| 5 | House card | Interesting | 9.1 | +| 1 | War | great 3D | 8.9 | ++----+------------+-------------+--------+ +``` + +### **Explanation:** +- Movies with **odd-numbered IDs**: `1`, `3`, and `5`. +- Excluding movie with `id = 3` because its description is `"boring"`. +- Sorting the remaining movies by `rating` in descending order gives the result. + +--- + +## **Solution Approaches** + +### **SQL Solution** +```sql +SELECT * +FROM Cinema +WHERE id % 2 = 1 + AND description != 'boring' +ORDER BY rating DESC; +``` +**Explanation:** +- The query filters movies where the `id` is odd (`id % 2 = 1`) and the `description` is not `"boring"`. +- The results are ordered by `rating` in descending order. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def not_boring_movies(cinema: pd.DataFrame) -> pd.DataFrame: + # Filter movies with odd-numbered id and description not equal to 'boring' + result = cinema[(cinema['id'] % 2 == 1) & (cinema['description'] != 'boring')] + # Sort the result by rating in descending order + return result.sort_values(by='rating', ascending=False) + +# Example usage: +# cinema_df = pd.read_csv('cinema.csv') +# print(not_boring_movies(cinema_df)) +``` +**Explanation:** +- The Pandas solution filters the DataFrame to include only rows where the `id` is odd and the `description` is not `"boring"`. +- It then sorts the filtered results by `rating` in descending order. + +--- + +## **File Structure** +``` +LeetCode620/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solution. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution for Python users. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 620](https://leetcode.com/problems/not-boring-movies/) +- [SQL WHERE Clause Documentation](https://www.w3schools.com/sql/sql_where.asp) +- [Pandas Documentation](https://pandas.pydata.org/docs/) + diff --git a/LeetCode SQL 50 Solution/626. Exchange Seats/626. Exchange Seats.py b/LeetCode SQL 50 Solution/626. Exchange Seats/626. Exchange Seats.py new file mode 100644 index 0000000..512b00b --- /dev/null +++ b/LeetCode SQL 50 Solution/626. Exchange Seats/626. Exchange Seats.py @@ -0,0 +1,18 @@ +import pandas as pd + +def exchange_seats(seat: pd.DataFrame) -> pd.DataFrame: + # Total number of students + total = seat.shape[0] + + # Function to compute the new seat id + def new_id(row): + # For odd id values: + if row['id'] % 2 != 0: + # If it's the last row in an odd-length list, do not change the id. + if row['id'] == total: + return row['id'] + else: + return row['id'] + 1 + # For even id values, swap with previous odd id + else: + return row['id'] - 1 \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/626. Exchange Seats/626. Exchange Seats.sql b/LeetCode SQL 50 Solution/626. Exchange Seats/626. Exchange Seats.sql new file mode 100644 index 0000000..d4ba56e --- /dev/null +++ b/LeetCode SQL 50 Solution/626. Exchange Seats/626. Exchange Seats.sql @@ -0,0 +1,65 @@ +626. Exchange Seats +Solved +Medium +Topics +Companies +SQL Schema +Pandas Schema +Table: Seat + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| student | varchar | ++-------------+---------+ +id is the primary key (unique value) column for this table. +Each row of this table indicates the name and the ID of a student. +The ID sequence always starts from 1 and increments continuously. + + +Write a solution to swap the seat id of every two consecutive students. If the number of students is odd, the id of the last student is not swapped. + +Return the result table ordered by id in ascending order. + +The result format is in the following example. + + + +Example 1: + +Input: +Seat table: ++----+---------+ +| id | student | ++----+---------+ +| 1 | Abbot | +| 2 | Doris | +| 3 | Emerson | +| 4 | Green | +| 5 | Jeames | ++----+---------+ +Output: ++----+---------+ +| id | student | ++----+---------+ +| 1 | Doris | +| 2 | Abbot | +| 3 | Green | +| 4 | Emerson | +| 5 | Jeames | ++----+---------+ +Explanation: +Note that if the number of students is odd, there is no need to change the last one's seat. +''' + +# MYSQL Query Accepted + +SELECT ( CASE + WHEN id%2 != 0 AND id != counts THEN id+1 + WHEN id%2 != 0 AND id = counts THEN id + ELSE id-1 + END) AS id, student +FROM seat, (select count(*) as counts from seat) +AS seat_counts +ORDER BY id ASC \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/626. Exchange Seats/readme.md b/LeetCode SQL 50 Solution/626. Exchange Seats/readme.md new file mode 100644 index 0000000..7bc6b79 --- /dev/null +++ b/LeetCode SQL 50 Solution/626. Exchange Seats/readme.md @@ -0,0 +1,147 @@ +# **626. Exchange Seats** + +## **Problem Statement** +You are given a table `Seat` that contains the seat IDs and names of students. The seat IDs are assigned consecutively starting from 1. + +### **Seat Table** +``` ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| id | int | +| student | varchar | ++-------------+---------+ +``` +- `id` is the **primary key** (unique value). +- Each row represents a student and their assigned seat. +- The `id` sequence always starts from 1 and increments continuously. + +### **Task:** +Swap the seat `id` of every two consecutive students. +- If the number of students is odd, the `id` of the last student remains unchanged. +- Return the result table ordered by `id` in ascending order. + +--- + +## **Example 1:** + +### **Input:** +``` +Seat table: ++----+---------+ +| id | student | ++----+---------+ +| 1 | Abbot | +| 2 | Doris | +| 3 | Emerson | +| 4 | Green | +| 5 | Jeames | ++----+---------+ +``` + +### **Output:** +``` ++----+---------+ +| id | student | ++----+---------+ +| 1 | Doris | +| 2 | Abbot | +| 3 | Green | +| 4 | Emerson | +| 5 | Jeames | ++----+---------+ +``` + +### **Explanation:** +- Swap the seat assignments of every two consecutive students: + - Seats 1 and 2: **Abbot** and **Doris** swap positions. + - Seats 3 and 4: **Emerson** and **Green** swap positions. + - Since the number of students is odd, **Jeames** (seat 5) remains in the same seat. + +--- + +## **Solution Approaches** + +### **SQL Solution** +```sql +SELECT + CASE + WHEN id % 2 != 0 AND id != counts THEN id + 1 + WHEN id % 2 != 0 AND id = counts THEN id + ELSE id - 1 + END AS id, + student +FROM Seat, (SELECT COUNT(*) AS counts FROM Seat) AS seat_counts +ORDER BY id ASC; +``` +**Explanation:** +- The subquery `(SELECT COUNT(*) AS counts FROM Seat)` computes the total number of students. +- The `CASE` statement swaps IDs: + - For odd `id` (except the last one if the count is odd), we add 1. + - For even `id`, we subtract 1. + - For the last student in an odd-length list, we leave the `id` unchanged. +- The results are then ordered by the new `id` in ascending order. + +--- + +### **Pandas Solution** +```python +import pandas as pd + +def exchange_seats(seat: pd.DataFrame) -> pd.DataFrame: + # Total number of students + total = seat.shape[0] + + # Function to compute the new seat id + def new_id(row): + # For odd id values: + if row['id'] % 2 != 0: + # If it's the last row in an odd-length list, do not change the id. + if row['id'] == total: + return row['id'] + else: + return row['id'] + 1 + # For even id values, swap with previous odd id + else: + return row['id'] - 1 + + # Apply the new_id function to each row + seat['new_id'] = seat.apply(new_id, axis=1) + + # Sort by the new seat id and select the desired columns + result = seat.sort_values('new_id')[['new_id', 'student']].rename(columns={'new_id': 'id'}) + + return result.reset_index(drop=True) + +# Example usage: +# data = {'id': [1, 2, 3, 4, 5], 'student': ['Abbot', 'Doris', 'Emerson', 'Green', 'Jeames']} +# df = pd.DataFrame(data) +# print(exchange_seats(df)) +``` +**Explanation:** +- The solution calculates the total number of rows. +- A helper function `new_id` computes the new seat id: + - For odd `id`s (except the last one), add 1. + - For even `id`s, subtract 1. + - Leave the last seat unchanged if the count is odd. +- The DataFrame is sorted by the new `id`, and the result is returned. + +--- + +## **File Structure** +``` +LeetCode626/ +β”œβ”€β”€ problem_statement.md # Contains the problem description and constraints. +β”œβ”€β”€ sql_solution.sql # Contains the SQL solution. +β”œβ”€β”€ pandas_solution.py # Contains the Pandas solution. +β”œβ”€β”€ README.md # Overview of the problem and available solutions. +``` + +--- + +## **Useful Links** +- [LeetCode Problem 626](https://leetcode.com/problems/exchange-seats/) +- [SQL CASE Statement Documentation](https://www.w3schools.com/sql/sql_case.asp) +- [Pandas apply() Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html) +- [Pandas DataFrame Sorting](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html) + diff --git a/LeetCode SQL 50 Solution/prosnal_database/prosnal_database.py b/LeetCode SQL 50 Solution/prosnal_database/prosnal_database.py new file mode 100644 index 0000000..96e3009 --- /dev/null +++ b/LeetCode SQL 50 Solution/prosnal_database/prosnal_database.py @@ -0,0 +1,141 @@ +Below is a complete Python solution using a class with methods to solve the problem with Pandas. This code creates the data (mimicking the SQL tables), then provides methods to run each query. + +```python +import pandas as pd + +class ProsnalDatabase: + def __init__(self): + # Create tables (as Pandas DataFrames) + self.create_tables() + + def create_tables(self): + # Employees Table + self.employees = pd.DataFrame({ + 'Id': [1, 2, 3, 4, 5], + 'Name': ['John Doe', 'Jane Smith', 'Mike Johnson', 'Sarah Black', 'David White'], + 'Salary': [50000, 60000, 70000, 60000, 70000], + 'Department': ['HR', 'Finance', 'IT', 'Finance', 'IT'] + }) + + # Projects Table + self.projects = pd.DataFrame({ + 'Id': [1, 2, 3], + 'Name': ['Project A', 'Project B', 'Project C'], + 'Department': ['IT', 'Finance', 'IT'] + }) + + # EmployeeProjects Table + self.employee_projects = pd.DataFrame({ + 'EmployeeId': [1, 1, 2, 3, 3, 4, 5], + 'ProjectId': [1, 2, 1, 1, 3, 2, 1] + }) + + # Users Table + self.users = pd.DataFrame({ + 'id': [1, 2], + 'name': ['John Doe', 'Jane Smith'], + 'email': ['john.doe@example.com', 'antima@example.com'] + }) + + # Sessions Table + self.sessions = pd.DataFrame({ + 'id': [1, 2, 3], + 'user_id': [1, 1, 2], + 'session_date': pd.to_datetime(['2022-01-01', '2022-01-02', '2022-01-03']) + }) + + # Products Table + self.products = pd.DataFrame({ + 'id': [1, 2, 3], + 'name': ['Product A', 'Product B', 'Product C'], + 'price': [10.99, 20.99, 30.99] + }) + + # Orders Table + self.orders = pd.DataFrame({ + 'id': [1, 2, 3, 4, 5], + 'product_id': [1, 2, 3, 1, 2], + 'quantity': [5, 3, 1, 2, 4] + }) + + def query_employee_projects(self): + """ + Returns a DataFrame that lists each employee's project details. + This joins the Employees, EmployeeProjects, and Projects DataFrames. + """ + # Merge Employees with EmployeeProjects + emp_proj = self.employees.merge( + self.employee_projects, left_on='Id', right_on='EmployeeId' + ) + # Merge with Projects to get project details + emp_proj = emp_proj.merge( + self.projects, left_on='ProjectId', right_on='Id', suffixes=('_Employee', '_Project') + ) + # Select and rename desired columns + result = emp_proj[['Name_Employee', 'Department_Employee', 'Name_Project', 'Department_Project']] + result = result.rename(columns={ + 'Name_Employee': 'EmployeeName', + 'Department_Employee': 'EmployeeDepartment', + 'Name_Project': 'ProjectName', + 'Department_Project': 'ProjectDepartment' + }) + return result + + def query_user_sessions(self): + """ + Returns a DataFrame that lists the session dates for each user. + This joins the Users and Sessions DataFrames. + """ + user_sessions = self.users.merge( + self.sessions, left_on='id', right_on='user_id' + ) + result = user_sessions[['name', 'session_date']].rename(columns={'name': 'user_name'}) + return result + + def query_order_totals(self): + """ + Returns a DataFrame that calculates the total price for each order. + This joins the Products and Orders DataFrames and computes total price = price * quantity. + """ + orders_merged = self.orders.merge( + self.products, left_on='product_id', right_on='id' + ) + orders_merged['total_price'] = orders_merged['price'] * orders_merged['quantity'] + result = orders_merged[['name', 'quantity', 'total_price']].rename(columns={'name': 'product_name'}) + return result + +# Example usage: +if __name__ == '__main__': + db = ProsnalDatabase() + + print("Employee Projects:") + print(db.query_employee_projects(), "\n") + + print("User Sessions:") + print(db.query_user_sessions(), "\n") + + print("Order Totals:") + print(db.query_order_totals()) +``` + +### Explanation: +- **Class Initialization (`__init__`)**: + The `ProsnalDatabase` class initializes by calling `create_tables()`, which creates sample DataFrames for all tables. + +- **`create_tables()`**: + This method creates DataFrames for `Employees`, `Projects`, `EmployeeProjects`, `Users`, `Sessions`, `Products`, and `Orders` with sample data. + +- **Query Methods**: + - **`query_employee_projects()`**: + Merges the `employees`, `employee_projects`, and `projects` DataFrames to show which employee works on which project. Columns are renamed for clarity. + + - **`query_user_sessions()`**: + Merges the `users` and `sessions` DataFrames to list session dates for each user. + + - **`query_order_totals()`**: + Merges the `orders` DataFrame with the `products` DataFrame and calculates the total price for each order. + +- **Main Block**: + An instance of `ProsnalDatabase` is created, and the query methods are executed to display results. + +This modular, class-based approach using Pandas helps encapsulate data and query logic in a single, easy-to-manage unit. Let me know if you need any further modifications! \ No newline at end of file diff --git a/LeetCode SQL 50 Solution/prosnal_database/prosnal_database.sql b/LeetCode SQL 50 Solution/prosnal_database/prosnal_database.sql new file mode 100644 index 0000000..4e75df7 --- /dev/null +++ b/LeetCode SQL 50 Solution/prosnal_database/prosnal_database.sql @@ -0,0 +1,104 @@ +CREATE DATABASE IF NOT EXISTS prosnal_database; +USE prosnal_database; + +CREATE TABLE IF NOT EXISTS Employees ( + Id INT, + Name VARCHAR(255), + Salary INT, + Department VARCHAR(255) +); + +INSERT INTO Employees (Id, Name, Salary, Department) +VALUES (1, 'John Doe', 50000, 'HR'), + (2, 'Jane Smith', 60000, 'Finance'), + (3, 'Mike Johnson', 70000, 'IT'), + (4, 'Sarah Black', 60000, 'Finance'), + (5, 'David White', 70000, 'IT'); + +CREATE TABLE IF NOT EXISTS Projects ( + Id INT, + Name VARCHAR(255), + Department VARCHAR(255) + ); + + INSERT INTO Projects (Id, Name, Department) + VALUES (1, 'Project A', 'IT'), + (2, 'Project B', 'Finance'), + (3, 'Project C', 'IT'); + +CREATE TABLE IF NOT EXISTS EmployeeProjects ( + EmployeeId INT, + ProjectId INT + ); + + INSERT INTO EmployeeProjects (EmployeeId, ProjectId) + VALUES (1, 1), + (1, 2), + (2, 1), + (3, 1), + (3, 3), + (4, 2), + (5, 1); + +SELECT e.Name AS EmployeeName, e.Department, p.Name AS ProjectName, p.Department +FROM Employees e +JOIN EmployeeProjects ep +ON e.Id = ep.EmployeeId +JOIN Projects p +ON ep.ProjectId = p.Id; + +CREATE TABLE users ( + id INT PRIMARY KEY, + name VARCHAR(50), + email VARCHAR(100) +); + +INSERT INTO users (id, name, email) +VALUES (1, 'John Doe', 'john.doe@example.com'), + (2, 'Jane Smith', 'antima@example.com'); + +CREATE TABLE sessions ( + id INT PRIMARY KEY, + user_id INT, + session_date DATE +); + +INSERT INTO sessions (id, user_id, session_date) +VALUES (1, 1, '2022-01-01'), + (2, 1, '2022-01-02'), + (3, 2, '2022-01-03'); + +SELECT u.name AS user_name, s.session_date +FROM users u +JOIN sessions s +ON u.id = s.user_id; + +CREATE TABLE IF NOT EXISTS products ( + id INT PRIMARY KEY, + name VARCHAR(50), + price DECIMAL(10, 2) +); + +INSERT INTO products (id, name, price) +VALUES (1, 'Product A', 10.99), + (2, 'Product B', 20.99), + (3, 'Product C', 30.99); + +CREATE TABLE IF NOT EXISTS orders ( + id INT PRIMARY KEY, + product_id INT, + quantity INT +); + +INSERT INTO orders (id, product_id, quantity) +VALUES (1, 1, 5), + (2, 2, 3), + (3, 3, 1), + (4, 1, 2), + (5, 2, 4); + +SELECT p.name AS product_name, o.quantity, p.price * o.quantity AS total_price +FROM products p +JOIN orders o +ON p.id = o.product_id; +``` diff --git a/LeetCode SQL 50 Solution/prosnal_database/readme.md b/LeetCode SQL 50 Solution/prosnal_database/readme.md new file mode 100644 index 0000000..2fed417 --- /dev/null +++ b/LeetCode SQL 50 Solution/prosnal_database/readme.md @@ -0,0 +1,221 @@ + +# Prosnal Database Example + +This repository contains SQL scripts for creating and populating a sample database called `prosnal_database`. The database includes multiple tables and queries demonstrating joins and calculations. The following sections describe the database schema, sample data insertion, and example queries. + +--- + +## Database and Tables + +### 1. Create and Use Database +```sql +CREATE DATABASE IF NOT EXISTS prosnal_database; +USE prosnal_database; +``` +- This command creates the database `prosnal_database` if it does not exist and sets it as the current working database. + +--- + +### 2. Employees Table +```sql +CREATE TABLE IF NOT EXISTS Employees ( + Id INT, + Name VARCHAR(255), + Salary INT, + Department VARCHAR(255) +); +``` +- **Description:** + Contains employee information such as `Id`, `Name`, `Salary`, and `Department`. + +#### Sample Data: +```sql +INSERT INTO Employees (Id, Name, Salary, Department) +VALUES (1, 'John Doe', 50000, 'HR'), + (2, 'Jane Smith', 60000, 'Finance'), + (3, 'Mike Johnson', 70000, 'IT'), + (4, 'Sarah Black', 60000, 'Finance'), + (5, 'David White', 70000, 'IT'); +``` + +--- + +### 3. Projects Table +```sql +CREATE TABLE IF NOT EXISTS Projects ( + Id INT, + Name VARCHAR(255), + Department VARCHAR(255) +); +``` +- **Description:** + Contains project details including project `Id`, `Name`, and the corresponding `Department`. + +#### Sample Data: +```sql +INSERT INTO Projects (Id, Name, Department) +VALUES (1, 'Project A', 'IT'), + (2, 'Project B', 'Finance'), + (3, 'Project C', 'IT'); +``` + +--- + +### 4. EmployeeProjects Table +```sql +CREATE TABLE IF NOT EXISTS EmployeeProjects ( + EmployeeId INT, + ProjectId INT +); +``` +- **Description:** + Associates employees with projects. + +#### Sample Data: +```sql +INSERT INTO EmployeeProjects (EmployeeId, ProjectId) +VALUES (1, 1), + (1, 2), + (2, 1), + (3, 1), + (3, 3), + (4, 2), + (5, 1); +``` + +--- + +### 5. Users and Sessions Tables +#### Users Table +```sql +CREATE TABLE users ( + id INT PRIMARY KEY, + name VARCHAR(50), + email VARCHAR(100) +); +``` +- **Description:** + Contains user information with unique `id`, `name`, and `email`. + +#### Sample Data: +```sql +INSERT INTO users (id, name, email) +VALUES (1, 'John Doe', 'john.doe@example.com'), + (2, 'Jane Smith', 'antima@example.com'); +``` + +#### Sessions Table +```sql +CREATE TABLE sessions ( + id INT PRIMARY KEY, + user_id INT, + session_date DATE +); +``` +- **Description:** + Records session data with session `id`, associated `user_id`, and the `session_date`. + +#### Sample Data: +```sql +INSERT INTO sessions (id, user_id, session_date) +VALUES (1, 1, '2022-01-01'), + (2, 1, '2022-01-02'), + (3, 2, '2022-01-03'); +``` + +--- + +### 6. Products and Orders Tables +#### Products Table +```sql +CREATE TABLE IF NOT EXISTS products ( + id INT PRIMARY KEY, + name VARCHAR(50), + price DECIMAL(10, 2) +); +``` +- **Description:** + Contains product details such as `id`, `name`, and `price`. + +#### Sample Data: +```sql +INSERT INTO products (id, name, price) +VALUES (1, 'Product A', 10.99), + (2, 'Product B', 20.99), + (3, 'Product C', 30.99); +``` + +#### Orders Table +```sql +CREATE TABLE IF NOT EXISTS orders ( + id INT PRIMARY KEY, + product_id INT, + quantity INT +); +``` +- **Description:** + Stores order details including order `id`, associated `product_id`, and order `quantity`. + +#### Sample Data: +```sql +INSERT INTO orders (id, product_id, quantity) +VALUES (1, 1, 5), + (2, 2, 3), + (3, 3, 1), + (4, 1, 2), + (5, 2, 4); +``` + +--- + +## Example Queries + +### Query 1: List Employee Projects +Join the **Employees**, **EmployeeProjects**, and **Projects** tables to list each employee's project details. +```sql +SELECT e.Name AS EmployeeName, e.Department, p.Name AS ProjectName, p.Department +FROM Employees e +JOIN EmployeeProjects ep ON e.Id = ep.EmployeeId +JOIN Projects p ON ep.ProjectId = p.Id; +``` + +### Query 2: List User Sessions +Join the **users** and **sessions** tables to list the session dates for each user. +```sql +SELECT u.name AS user_name, s.session_date +FROM users u +JOIN sessions s ON u.id = s.user_id; +``` + +### Query 3: Calculate Order Totals +Join the **products** and **orders** tables to calculate the total price for each order. +```sql +SELECT p.name AS product_name, o.quantity, p.price * o.quantity AS total_price +FROM products p +JOIN orders o ON p.id = o.product_id; +``` + +--- + +## File Structure +``` +Prosnal_Database/ +│── README.md +│── schema.sql -- Contains all CREATE TABLE and INSERT statements. +│── queries.sql -- Contains sample SELECT queries. +│── sample_data.csv -- (Optional) CSV files for sample data. +``` + +--- + +## Useful Links +- [MySQL Documentation](https://dev.mysql.com/doc/) +- [W3Schools SQL Tutorial](https://www.w3schools.com/sql/) +- [Pandas Documentation](https://pandas.pydata.org/docs/) + +--- + +This `README.md` provides a comprehensive overview of the database creation, data insertion, and query examples. Let me know if you need any modifications or further details! +``` + +This documentation should help users understand the purpose of each table and query, and it organizes your code in a clear and accessible way. \ No newline at end of file diff --git a/README.md b/README.md index 61ebe5e..42d93d3 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,54 @@ +
+ # LeetCode_SQL_Database + This repo contains my SQL solutions for LeetCode problems. Each file includes a problem description, optimized SQL query, and explanations. The goal is to improve SQL skills, share knowledge, and collaborate. Feel free to explore, suggest improvements, or contribute! πŸš€ + + +## LeetCode SQL Solutions:- -LeetCode SQL Solutions:- + This repository contains my solutions to SQL problems from LeetCode. Each solution is implemented using MySQL and aims to demonstrate efficient and optimal approaches to solve common database-related challenges. +
+ +
+ +## Repository Structure:- -Repository Structure:- Problem Name: Each SQL file is named after the LeetCode problem it solves. Solution Explanation: Each SQL file includes comments explaining the logic and approach used to solve the problem. Testing: Sample test cases or scenarios may be included in the comments or README where applicable. Why This Repository? -Learning and Practice:- -This repository serves as a resource for learning SQL techniques for solving algorithmic problems. -Collaboration: I welcome feedback and suggestions for improving solutions or alternative approaches. + +## Learning and Practice:- + + + +**This repository serves as a resource for learning SQL techniques for solving algorithmic problems.** + +### Collaboration:- + +I welcome feedback and suggestions for improving solutions or alternative approaches. Community: By sharing these solutions, I hope to contribute to the SQL programming community and help others learn and grow. -Contents: + +### Contents:- + Problem 1: Description of the problem and the SQL solution. Problem 2: Description of the problem and the SQL solution. Problem 3: Description of the problem and the SQL solution. -Contributing: + + + +### Contributing:- + Feel free to fork this repository, suggest improvements, or submit your own solutions via pull requests. Let's learn and grow together! +## Solved LeetCode SQL 50 Question + + + + +### Progress +🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩🟩 100% + diff --git a/SVG/README.MD b/SVG/README.MD new file mode 100644 index 0000000..941fc48 --- /dev/null +++ b/SVG/README.MD @@ -0,0 +1 @@ +# SVG diff --git a/img/SQL.jpeg b/img/SQL.jpeg new file mode 100644 index 0000000..f09d7c4 Binary files /dev/null and b/img/SQL.jpeg differ diff --git a/img/leetcode me.png b/img/leetcode me.png new file mode 100644 index 0000000..4fb0181 Binary files /dev/null and b/img/leetcode me.png differ diff --git a/img/leetcode salution.png b/img/leetcode salution.png new file mode 100644 index 0000000..8546367 Binary files /dev/null and b/img/leetcode salution.png differ diff --git a/img/leetcode-sql.png b/img/leetcode-sql.png new file mode 100644 index 0000000..4a82d93 Binary files /dev/null and b/img/leetcode-sql.png differ diff --git a/img/leetcode.png b/img/leetcode.png new file mode 100644 index 0000000..da86bd1 Binary files /dev/null and b/img/leetcode.png differ diff --git a/img/salution.png b/img/salution.png new file mode 100644 index 0000000..1e257b8 Binary files /dev/null and b/img/salution.png differ diff --git a/img/sql 50.gif b/img/sql 50.gif new file mode 100644 index 0000000..f225cd8 Binary files /dev/null and b/img/sql 50.gif differ diff --git a/img/sql50.png b/img/sql50.png new file mode 100644 index 0000000..e41216c Binary files /dev/null and b/img/sql50.png differ diff --git a/img/work.jpeg b/img/work.jpeg new file mode 100644 index 0000000..5d294ad Binary files /dev/null and b/img/work.jpeg differ