Python Record Manual
Python Record Manual
1. Write programs to parse text files, CSV, HTML, XML and JSON documents and
extract relevant data. After retrieving data check any anomalies in the data,
missing values etc.
def check_missing_values(data):
missing_values = []
for row in data:
for value in row:
if not value:
missing_values.append(row)
break
return missing_values
csv_file_path = 'data.csv'
header, csv_data = parse_csv_file(csv_file_path)
missing_values = check_missing_values(csv_data)
print("Missing values in CSV:", missing_values)
# import module
import requests
import pandas as pd
from bs4 import BeautifulSoup
htmldata = getdata("https://www.geeksforgeeks.org/how-to-automate-an-excel-sheet-
in-python/?ref=feed")
soup = BeautifulSoup(htmldata, 'html.parser')
data = ' '
for data in soup.find_all("p"):
print(data.get_text())
Parsing XML Files Using ElementTree:
import xml.etree.ElementTree as ET
def parse_xml_file(file_path):
tree = ET.parse(file_path)
root = tree.getroot()
return root
xml_file_path = 'data.xml'
parsed_xml = parse_xml_file(xml_file_path)
print(ET.tostring(parsed_xml, encoding='utf-8').decode('utf-8')) # Print parsed
XML content
def parse_json_file(file_path):
with open(file_path, 'r') as json_file:
data = json.load(json_file)
return data
def check_anomalies(data):
anomalies = []
# Implement your own logic to check for anomalies in the JSON data
# For example, you can check for unexpected keys or missing values
# and add them to the 'anomalies' list.
return anomalies
json_file_path = 'data.json'
parsed_json = parse_json_file(json_file_path)
anomalies = check_anomalies(parsed_json)
print("Anomalies in JSON:", anomalies)
Write programs for searching, splitting, and replacing strings based on pattern
matching using regular expressions
import re
# Example usage
text = "Hello there! My email is example@example.com and my phone number is 123-
456-7890."
pattern_email = r'\S+@\S+' # Matches email addresses
pattern_phone = r'\d{3}-\d{3}-\d{4}' # Matches phone numbers
search_pattern(pattern_email, text)
split_string(pattern_email, text)
replace_pattern(pattern_phone, '[PHONE]', text)
Design a relational database for a small application and populate the database.
Using SQL do the CRUD (create, read, update and delete) operations
CRUD:
design a simple relational database schema and perform CRUD operations using SQL.
Let's assume we're creating a small application to manage a library's book
collection. The database will need tables to store information about books and
authors. Here's how the schema could look:
Tables:
Authors
Books
book_id (Primary Key)
title
publication_year
author_id (Foreign Key referencing Authors)
Creating the Database:
CREATE DATABASE LibraryApp;
USE LibraryApp;
CRUD Operations:
1. Create (INSERT):
INSERT INTO Authors (author_id, first_name, last_name)
VALUES (3, 'Michael', 'Johnson');
3. Update (UPDATE):
-- Update book title
UPDATE Books
SET title = 'Updated Title'
WHERE book_id = 2;
4. Delete (DELETE):
-- Delete a book
DELETE FROM Books
WHERE book_id = 3;
create a Python MongoDB client using the pymongo module and practice various
functions on a collection:
Reasons to opt for MongoDB :
1. It supports hierarchical data structure (Please refer docs for details)
2. It supports associate arrays like Dictionaries in Python.
3. Built-in Python drivers to connect python-application with Database. Example-
PyMongo
4. It is designed for Big Data.
5. Deployment of MongoDB is very easy.
Create a connection : The very first after importing the module is to create a
MongoClient.
from pymongo import MongoClient
client = MongoClient(“mongodb://localhost:27017/”)
EXAMPLE
from pymongo import MongoClient
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['mydatabase'] # Replace 'mydatabase' with your database name
collection = db['mycollection'] # Replace 'mycollection' with your collection name
# Insert documents
data_to_insert = [
{"name": "Alice", "age": 30, "city": "New York"},
{"name": "Bob", "age": 25, "city": "San Francisco"},
{"name": "Charlie", "age": 40, "city": "Los Angeles"}
]
collection.insert_many(data_to_insert)
# Find documents
result = collection.find({"city": "New York"})
for doc in result:
print(doc)
# Update documents
collection.update_one({"name": "Alice"}, {"$set": {"age": 31}})
# Replace document
collection.replace_one({"name": "Bob"}, {"name": "Bobby", "age": 26, "city": "San
Francisco"})
# Remove documents
collection.delete_one({"name": "Charlie"})
# Aggregation
pipeline = [
{"$group": {"_id": "$city", "count": {"$sum": 1}}}
]
aggregation_result = collection.aggregate(pipeline)
for doc in aggregation_result:
print(doc)
# Create an index
collection.create_index("name")
# Close the connection
client.close()
Write programs to create numpy arrays of different shapes and from different
sources, reshape and slice arrays, add array indexes, and apply arithmetic, logic,
and aggregation functions to some or all array elements
What is NumPy?
NumPy is a Python library used for working with arrays.
It also has functions for working in domain of linear algebra, fourier transform,
and matrices.
NumPy was created in 2005 by Travis Oliphant. It is an open source project and you
can use it freely.
NumPy stands for Numerical Python.
import numpy as np
# Create arrays of different shapes and sources
array1d = np.array([1, 2, 3, 4, 5]) # 1D array
array2d = np.array([[1, 2, 3], [4, 5, 6]]) # 2D array
array_zeros = np.zeros((3, 4)) # 3x4 array of zeros
array_ones = np.ones((2, 2)) # 2x2 array of ones
array_range = np.arange(0, 10, 2) # Array with values [0, 2, 4, 6, 8]
# Reshape arrays
reshaped_array = array2d.reshape(3, 2) # Reshape 2x3 array to 3x2
# Slicing arrays
sliced_array = array1d[1:4] # Slice elements from index 1 to 3 (inclusive)
sliced_2d_array = array2d[:, 1] # Slice second column from 2D array
# Add array indexes
indexed_array = array1d + np.arange(5) # Add array indexes to each element
# Arithmetic operations
addition_result = array1d + 10
multiplication_result = array2d * 2
# Logic operations
boolean_array = array1d > 3 # Array of boolean values based on condition
# Aggregation functions
sum_array = np.sum(array1d) # Sum of all elements
mean_array = np.mean(array2d) # Mean of all elements
max_value = np.max(array1d) # Maximum value in the array
print("1D Array:", array1d)
print("2D Array:", array2d)
print("Reshaped Array:", reshaped_array)
print("Sliced Array:", sliced_array)
print("Sliced 2D Array:", sliced_2d_array)
print("Indexed Array:", indexed_array)
print("Addition Result:", addition_result)
print("Multiplication Result:", multiplication_result)
print("Boolean Array:", boolean_array)
print("Sum of Array:", sum_array)
print("Mean of Array:", mean_array)
print("Maximum Value:", max_value)
Write programs to use the pandas data structures: Frames and series as storage
containers and for a variety of data-wrangling operations, such as:
Single-Level Indexing:
Single-level indexing is the basic way of indexing in Pandas, where you have a
single index to access rows and columns of a DataFrame or Series.
DataFrame Single-Level Indexing:
import pandas as pd
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data, index=['row1', 'row2', 'row3'])
df = pd.DataFrame(data, index=index)
import pandas as pd
Dropping Missing Data: If the missing data is not critical and doesn't affect your
analysis, you can drop rows or columns containing missing values using the dropna()
method:
Filling Missing Data: You can fill missing values with specific values using the
fillna() method:
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe from dictionary
df = pd.DataFrame(dict)
# filling missing value using fillna()
print(df.fillna(0))
(OR)
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe from dictionary
df = pd.DataFrame(dict)
# filling a missing value with
# previous ones
print(df.fillna(method ='pad’))
#df.fillna(method ='bfill’)
you can perform arithmetic and Boolean operations on entire columns and tables
using various built-in functions and operators. Pandas is a powerful library in
Python for data manipulation and analysis. Here's how you can perform these
operations:
1. Arithmetic Operations on Columns: You can perform arithmetic operations
(addition, subtraction, multiplication, division, etc.) on entire columns in a
Pandas DataFrame.
import pandas as pd
# Create a DataFrame
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
print(df)
import pandas as pd
# Create a DataFrame
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
# Boolean operations on columns
df['C'] = df['A'] > 1 # Creates a Boolean column based on a comparison
filtered_df = df[df['B'] > 4] # Creates a new DataFrame with a Boolean mask
print(df)
print(filtered_df)
Applying Functions to Columns: You can also apply custom functions to columns using
the apply method.
import pandas as pd
# Create a DataFrame
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
df['C'] = df['A'].apply(custom_function)
print(df)
import pandas as pd
# Create DataFrames
data1 = {'A': [1, 2, 3], 'B': [4, 5, 6]}
data2 = {'A': [2, 3, 4], 'B': [7, 8, 9]}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
print(result_df)
In pandas, a popular Python library for data manipulation and analysis, you can
perform various database-type operations such as merging and aggregation on
DataFrame and Series data structures. These operations are essential for working
with structured data and conducting data analysis tasks. Here's an overview of how
to perform merging and aggregation using pandas:
Merging DataFrames:
Merging is the process of combining two or more DataFrames based on common columns
or indices. The primary function for merging DataFrames in pandas is the merge()
function. Commonly used types of merges include:
1. Inner Merge: Retains only the rows with matching keys in both DataFrames.
2. Outer Merge: Includes all rows from both DataFrames, filling in missing
values with NaN where necessary.
3. Left Merge: Keeps all rows from the left DataFrame and only matching rows
from the right DataFrame.
4. Right Merge: Keeps all rows from the right DataFrame and only matching rows
from the left DataFrame.
Example of performing an inner merge:
import pandas as pd
Aggregation:
Aggregation involves summarizing data using functions like sum, mean, count, etc.
Pandas provides the groupby() function to group data by one or more columns and
then perform aggregation operations on the grouped data.
import pandas as pd
grouped = df.groupby('Category')
aggregated_result = grouped['Value'].sum()
print(aggregated_result)
In pandas, you can easily plot individual columns and whole tables using the built-
in plotting capabilities. Pandas provides a .plot() function that can be applied to
both Series (individual columns) and DataFrames (whole tables). This function uses
Matplotlib for visualization by default.
Here's how you can plot individual columns and whole tables using pandas:
python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
Pandas is a powerful Python library for data manipulation and analysis. It provides
various functions and methods for reading and writing data from/to files using
different data structures like DataFrames and Series. Here's how you can read and
write data using pandas:
Reading Data from Files:
1. CSV Files: CSV (Comma-Separated Values) files are one of the most common file
formats for storing tabular data.
import pandas as pd
Excel Files: Pandas can also read data from Excel files (.xls or .xlsx).
import pandas as pd
Other Formats: Pandas supports reading data from various other formats such as
JSON, SQL databases, HTML tables, and more.
import pandas as pd
import pandas as pd
# Creating a DataFrame
data = pd.DataFrame({
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 28]
})
# Writing DataFrame to CSV file
data.to_csv('output.csv', index=False)
Excel Files:
import pandas as pd
# Creating a DataFrame
data = pd.DataFrame({
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 28]
})
Other Formats:
import pandas as pd
# Creating a DataFrame
data = pd.DataFrame({
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 28]
})
def raise_indentation_error():
print("This will raise an IndentationError")
print("Because there's no indentation before the 'print' statements")
def correct_indentation_error():
print("This corrects the IndentationError")
print("By adding proper indentation before the 'print' statements")
# raise_indentation_error()
# correct_indentation_error()
Write a program to compute distance between two points taking input from the user
# Reading co-ordinates
x1 = float(input('Enter x1: '))
y1 = float(input('Enter y1: '))
x2 = float(input('Enter x2: '))
y2 = float(input('Enter y2: '))
# Calculating distance
d = ( (x2-x1)**2 + (y2-y1)**2 ) ** 0.5
# Displaying result
print('Distance = %f' %(d))
Program to display the following information: Your name, Full Address, Mobile
Number, College Name, Course Subjects in python
def personal_details():
name, age = "Simon", 19
address = "Bangalore, Karnataka, India"
print("Name: {}\nAge: {}\nAddress: {}".format(name, age, address))
personal_details()
Write a Program for checking whether the given number is a even number or not.
# Python program to find the largest number among the three input numbers
for r in result:
print(r)
# prints 12
print("The gcd of 60 and 48 is : ", end="")
print(math.gcd(60, 48))
factorial = 1
import random
#cleaning
text = text.lower()
words = text.split()
words = [word.strip('.,!;()[]') for word in words]
words = [word.replace("'s", '') for word in words]
#finding unique
unique = []
for word in words:
if word not in unique:
unique.append(word)
#sort
unique.sort()
#print
print(unique)
Python Inheritance
class Animal:
def eat(self):
print("I can eat")
Polymorphism
class Cat:
def __init__(self, name, age):
self.name = name
self.age = age
def info(self):
print(f"I am a cat. My name is {self.name}. I am {self.age} years old.")
def make_sound(self):
print("Meow")
class Dog:
def __init__(self, name, age):
self.name = name
self.age = age
def info(self):
print(f"I am a dog. My name is {self.name}. I am {self.age} years old.")
def make_sound(self):
print("Bark")
Python program to Data visualization through Sea born for the above program 9.
# Importing libraries
import numpy as np
import seaborn as sns
import numpy as np
array=np.zeros(10)
print("An array of 10 zeros:")
print(array)
array=np.ones(10)
print("An array of 10 ones:")
print(array)
array=np.ones(10)*5
print("An array of 10 fives:")
print(array)
import numpy as np
rand_num = np.random.normal(0,1,25)
print("15 random numbers from a standard normal distribution:")
print(rand_num)