python
python
# sum=0
# for i in range(5):
# x=sum+i
# print(f"sum of {number-1} number is",x)
# if 5 > 2:
# print("Five is greater than two!")
#x=5
# y = "John"
# print(x)
# print(y)
# x = y = z = "Orange"
# print(x)
# print(y)
# print(z)
# name=["mansoor","afreen","aaqil"]
# x,y,z=name
# print(x)
# print(y)
# print(z)
# print(x,y,z)
# # Output variables
# x = "Python"
# y = "is"
# z = "awesome"
# print(x, y, z)
# x = "Python "
# y = "is "
# z = "awesome"
# print(x + y + z)
#x=5
# y = 10
# print(x + y)
#x=5
# y = "John"
# print(x, y)
#Global Variables
# x = "awesome"
#
# def myfunc():
# print("Python is " + x)
#
# myfunc()
# x = "awesome"
#
# def myfunc():
# x = "fantastic"
# print("Python is " + x)
#
# myfunc()
#
# print("Python is " + x)
# x="mansoor"
# def func():
# print("who is a hero",x)
# func()
#x=5
# print(type(x))
# x = "Hello World"
# print(type(x))
# x = 20
# print(type(x))
# x = 20.5 float
# x = 1j
# print(type(x))
# x = ["apple", "banana", "cherry"] list
# x = ("apple", "banana", "cherry") tuple
# x = range(6) range
# x = {"name" : "John", "age" : 36} dict
# x = {"apple", "banana", "cherry"} set
# x = frozenset({"apple", "banana", "cherry"}) frozenset
# x = True bool
# x = b"Hello" bytes
# x = bytearray(5) bytearray
# x = memoryview(bytes(5)) memoryview
# x = None NoneType
# x = 1 # int
# y = 2.8 # float
# z = 1j # complex
#
# print("value",{x+1},type(x))
# print(type(y))
# print(type(z))
# x = 35e3
# y = 12E4
# z = -87.7e100
#
# print(type(x))
# print(type(y))
# print(type(z))
# x = 3+5j
# y = 5j
# z = -5j
#
# print(type(x))
# print(type(y))
# print(type(z))
# x = 1 # int
# y = 2.8 # float
# z = 1j # complex
#
# #convert from int to float:
# a = float(x)
#
# #convert from float to int:
# b = int(y)
#
# #convert from int to complex:
# c = complex(x)
#
# print(a)
# print(b)
# print(c)
#
# print(type(a))
# print(type(b))
# print(type(c))
# Import the random module, and display a random number between 1 and 9:
# import random
#
# print(random.randrange(1, 10))
#casting
# x = float(1) # x will be 1.0
# y = float(2.8) # y will be 2.8
# z = float("3") # z will be 3.0
# w = float("4.2")
#
# print(type(x))
# print(type(y))
# print(type(z))
#Arthimetic operators
# + Addition x + y
# - Subtraction x - y
# * Multiplication x * y
# / Division x / y
# % Modulus x % y
# ** Exponentiation x ** y
# // Floor division x // y
# # Assignment operators
#=x=5 x=5
# += x += 3 x = x + 3
# -= x -= 3 x = x - 3
# *= x *= 3 x = x * 3
# /= x /= 3 x = x / 3
# %= x %= 3 x = x % 3
# //= x //= 3 x = x // 3
# **= x **= 3 x = x ** 3
# &= x &= 3 x = x & 3
# |= x |= 3 x = x | 3
# ^= x ^= 3 x = x ^ 3
# >>= x >>= 3 x = x >> 3
# <<= x <<= 3 x = x << 3
# := print(x := 3) x = 3
# print(x)
#comparision operators
# == Equal x == y
# != Not equal x != y
# > Greater than x > y
# < Less than x < y
# >= Greater than or equal to x >= y
# <= Less than or equal to x <= y
# a=40
# def cal(a=10,b=20):
# print("value of a",a)
# print("value of b", b)
# print(f"sum of {a} and {b} is",a+b)
# cal()
# print("value of a",a)
# b = "Hello, World!"
# print(b[2:5])
# print(b[-5:-2])
#DAY 5
# split - method will split values based separator provided(deafult = Space)
# and created list of split values
# split will happen from left to right
#
# strip - we will use to remove leading/tail
# white spaces/characters(default it will remove white spaces)
#
# Use case : Data quality check:
##
# # Phone numbers
# # firstname, middle, lastname, fullname, suffix, prefix
#
# phone_number
#0
#1
#2
#3
#4
#5
#
# join method will be used to convert list data to string data.
# import pandas as pd
# str1 = 'ETL Automation'
# str2 = 'P Automation'
#
# print("str1 is", str1)
# print("str1 type is ", type(str1))
# print("VALUE OF STRING IS",str2.split())
#
# print("split method on str1", str1.split())
#
# str_split = str1.split()
#
# print("type of str_split", type(str_split))
s = 'a-b-c-d-e-f'
# s1 = 'a b c d e f'
# print("s is", type(s))
# print("split method on str1", s.split())
#
# print("split method on str1", s.split(sep='-'))
# print("split method on str1", s.split(sep='-', maxsplit=2))
##
# # print("rsplit method on str1", s.rsplit('-'))
# # print("rsplit method on str1", s.rsplit('-', maxsplit=3))
##
# pkeys_cols = "col1,col2,col3"
# unique_cols = "uni_col1,uni_col2"
# null_cols = "null_col1,null_col2,null_col3"
#
# print("unique_cols list", unique_cols.split(',',maxsplit=2))
# print("null_cols list", null_cols.split(','))
#
# s = ' ETL Automation '
# print("s is ", s, len(s)) # len is python function used to check length of any datatype data
# print("after strip", s.strip(), len(s.strip()))
#
# s1 = '123ETL 123Automation123'
# print("s is ", s1) # len is python function used to check length of any datatype data
# print("after lstrip", s1.lstrip('123'))
# print("after rstrip", s1.rstrip('123'))
#
# cols =['col1', 'col2', 'col3']
# # print("cols", cols)
# # print("type of cols", type(cols))
# cols1 =['MANSOOR', 'VETTIYAN', 'AAQIL']
# str5=" ".join(cols1)
# print("value after join for list value is",str5)
# print("value after join for list value is",type(str5))
#
# str4 = "-".join(cols)
#
# print("str4", str4)
# print("type of str4", type(str4))
#
# list1 = [1,'2','3','4','a','b']
#
# str5 = "-".join(list1)
# print("str5", str5)
# print("type of str5", type(str5))
#
#
# cols =['col1', 'col2', 'col3']
# print("cols", cols)
# print("type of cols", type(cols))
#
#
# df = pd.read_csv(r"C:\Users\A4952\PycharmProjects\June_automation_batch1\Contact_info.csv")
# print("df columns",df.columns)
# print("df columns",type(df.columns))
# columns = df.columns
# cols = ",".join(columns) # cols variable
#
# print("after join", cols)
#
# sql = "select {d} from tablename".format(d=cols)
#
# print(sql)
#
# print("name is {name} and age {age}".format(name='Balu', age=20))
# name='Srini'
# age=21
# print(f"name is {name} and age {age}")
# str2 = "straße"
# print("str2 after lower",str2.lower()) # lower function will not handle non-ascii characters
# print("str2 after casefold",str2.casefold()) # casefold will handle non-ascii characters
#
# print(str1.center(40))
# str2 = 'ETLAUTOMATION'
#
# print("str2 is endswith ION", str2.endswith('IONN'))
# print("str2 is endswith etl", str2.startswith('etl'))
#
# print("str2 is isapl", str2.isalnum())
# split - method will split values based separator provided(deafult = Space) and created list of split
values
# split will happen from left to right
#
# strip - we will use to remove leading/tail white spaces/characters(default it will remove white
spaces)
#
# Use case : Data quality check:
#
# Phone numbers
# firstname, middle, lastname, fullname, suffix, prefix
# phone_number
#0
#1
#2
#3
#4
#5
# import pandas as pd
str1 = 'ETL Automation'
# slice str1[start:end:step]
#
# start is mandatory
# end is optional
# step is optional
print("str1",str1)
# print("str1[4:12]", str1[4:12])
#
# print("str1[5:14]", str1[5:14])
#
# print("str1[4:14]", str1[4:14])
#
# print("str1[16:20]", str1[16:20], len(str1[16:20]))
#
#
# print("str1[0:7:1]", str1[0:7:1])
#
# print("str1[0:7:2]", str1[0:7:2])
#
# print("str1[0:14:3]", str1[0:14:3])
# print("str1[0:14:2]", str1[0:14:2])
# print("str1[-1:-5]", str1[-1:-5:-1])
# print("str1[:10]", str1[:10])
# print("str1[4:]", str1[4:])
# print("str1[:]", str1[:])
# print("str1[::]", str1[::])
# print("str1[::]", str1[::-1])
# print("str1[:10]", str1[-1:-10:-1])
# str2 ='madam1'
#
# str3 = 'ETL'
#
# print("str2", str2)
# print("str2[::-1]", str2[::-1])
#
# if str2 == str2[::-1]:
# print("palinderome string")
# else:
# print("not a palindrome string")
#
# number = 1221
# number = str(number)
# if number == number[::-1]:
# print("palinderome number", number)
# else:
# print("not a palindrome number", number)
#
# #ctrl+/ comment an duncomment
#
#
# print("str1[-1:-5:-1]", str1[-1:-5:-1])
# print("str1[-1:-5:-2]", str1[-1:-5:-2])
#
# print("str1[-1:-10:-2]", str1[-1:-10:2])
#
# # if step value is +ve then end index is end-1
##
# # if step value is -ve then end index is end+1
#
# print("str1[-12:-7:1]",str1[-12:-7:1])
# print("str1[-12:-14:-1]", str1[-12:-14:-1])
#
# print("str1[-12:-14:1]", str1[-12:-14:1])
#
#
# print("str1[-12:100:1]",str1[-12:100:1])
#
# print("str1[-12:-2:2]",str1[-12:-2:2])
#
# print("str1[3:-2:1]",str1[3:-2:1])
#
# print("last two characters",str1[-1:-3:-1])
#
# print("last two characters",str1[-2::])
#
# print("str1[5:-1:-1]",str1[5:-1:-1])
# from numpy import array
# str = "automation@yahoo.in"
#
# atloca = str.index('@')
# str[0:atloca]
#
# user_name ='automation'
# provider = 'gmail'
# domain ='com'
# print("t is ", t)
# print("type of t is ", type(t))
ls = [1,2,3]
ls2 = ls
# a = 10
# b = 10
#
# print("values of ls", ls)
# print("id of ls", id(ls))
# print("values of ls2", ls2)
# print("id of ls2", id(ls2))
#
# ls2.append(4)
#
# print("after append values of ls", ls)
# print("after append id of ls", id(ls))
# print("after append values of ls2", ls2)
# print("after append id of ls2", id(ls2))
#
# ls3 = ls.copy()
# print("values of ls3", ls3)
# print("id of ls3", id(ls3))
#
# ls.append(5)
# print("after append ls", ls)
# print("after append values of ls2", ls2)
# print("after append values of ls3", ls3)
# print("after append id of ls3", id(ls3))
# a=10
# b=10.4
# d = {"one":"ETL","one":"ETL", 'two':'AUTOMATION', 'three':'TESTING'}
#
# print("d is", d)
# print("type of d", type(d))
# print("method available on dict type", dir(d))
#
# print("d keys", d.keys())
# print("d values", d.values())
# print("d items", d.items())
#
# print(d["one"]) # 1 is key not index
# print(d['two']) # 2 is key not index
#
# print('d.get("one")', d.get('one'))
#
# print('d.get("two")', d.get('two'))
#
# # print("d['one':'three']",d['one':'three']) # slicing can't possible
#
# d.pop('one')
#
# print("d after pop ", d)
#
# d.popitem()
# print("d after popitem ", d)
#
# d.update({"two":"ETL AUTOMATION","three":"TESTING", "three":"MANUAL TESTING", 1:'BIG
DATA'})
# print("d after update",d)
#
# d[1] = 'BIG DATA TESTING'
# # d['two'] = 'DATA TESTING'
# print("d after update",d)
# ls = []
# t = ()
# d2 = {}
# print("type of ls", type(ls))
# print("type of t", type(t))
# print("type of d2", type(d2))
# b=10
# ls4 = [1,2,3]
# ls5 = [1,2,3]
# print("id of ls4", id(ls4))
# print("id of ls5", id(ls5))
#
# t1 = (1,2,3)
# t2 = (1,2,3)
# print("id of t1", id(t1))
# print("id of t2", id(t2))
s = {23,1,2,3,1,4,5,1,3,19,17,21}
s2={27,28,35}
s.update(s2)
print(s)
print("s is", s)
print("type of s", type(s))
# # print("id of s", id(s))
# # # print("methods available in s", dir(s))
#
# s.add(25)
# print("s after add", s)
# s1 = {26,27}
# print("s1.union(s)", s1.union(s))
# # print("s[1]",s[1])
# ls = [21,1,1,2,2,2,2,3,3,5,5,8,9]
# ls1 = list(set(ls))
# print("ls is", ls)
# print("ls1 is", ls1)
#
# fs = frozenset(s)
#
# print("fs is", fs)
# print("type of fs", type(fs))
# print("id of fs", id(fs))
# # print("methods available in fs", dir(fs))
#logical operators
a= 100
b= 20
# print("bool(a)", bool(a))
# print("bool(b)", bool(b))
#
# print(" a and b is", a and b) # when both true, ouput will be right side value
#
# print(" a or b is", a or b) # when both true, ouput will be left side value
# c= 0
# d= 34
# print("bool(c)", bool(c))
# print("bool(d)", bool(d))
# print(" c and d is", c and d) # when both true, ouput right side value
#
#
#e=0
#f=5
#
# print("bool(e)", bool(e))
# print("bool(f)", bool(f))
# print(" e or f is", e or f)
# #Or oupt
# # if both true then output will be left right
# # if one value is true other value false then output will be True value
# # if both false output is 0
#
#h=0
#
# print("h value and bool(h)", h, bool(h))
# print(" h not ", not h)
#
# a,b,c = 20,30,10
#
# if a>b and a>c: # False and True--> False
# print("a is maximum value")
# elif b>a and b>c:# True and True--> True
# print("b is maximum value")
# else:
# print("c is maximu")
# a=range(10)
# print("value of range is:",type(a),id(a))
#
# # for i in range(10):
# # print("i value is",i)
# for i in range(2,5):
# print("i value is",i)
#positional argument
# def calc(*arg):
# print("arg",arg,type(arg))
# sum=0
# for i in arg:
# sum=sum+i
# print("sum",sum)
# calc(10,20)
#keyword argument
# def calc(**arg):
# print("arg",arg,type(arg))
# sum=0
# for value in arg.values():
# sum=sum+value
# print("sum",sum)
# calc(a=20,b=30)
# def calc(*arg):
# print("arg",arg,type(arg))
# sum=0
# for i in arg:
# sum=sum+i
# print("sum",sum)
# calc(10,20)
# import math
#
# print("math.sqrt(16)",math.sqrt(16))
import numpy as np
# print(np.array([1,"2",3]),type(np.array))
# print(type(np.array([1,"2",3])))
import pandas as pd
pd.set_option('display.max_columns',None)
pd.set_option('display.width',2000)
# df=pd.DataFrame({"a":[4,5,6],"b":[7,8,9],"c":[16,11,18]})
# print("df",df)
# print("type of df",type(df))
# df=pd.read_csv(R'C:\Users\DELL\PycharmProjects\pythonProject\files\Contact_info.csv')
# df2=pd.read_csv('C:\\Users\\DELL\\PycharmProjects\\pythonProject\\files\\Contact_info.csv')
# print(df2)
# df3=pd.read_excel(R'C:\Users\DELL\PycharmProjects\pythonProject\files\Contact_info_excel.xlsx')
# print(df3)
df=pd.read_csv(R'C:\Users\DELL\PycharmProjects\pythonProject\files\Contact_info.csv')
# print("*"*50)
# print(df.head(5))
# print("*"*50)
# print(df.tail(5))
# print("*"*50)
# print(df.sample(10))
# print("*"*50)
# print("sample fraction")
# print(df.sample(frac=0.5))
# print("*"*50)
# print("df shape")
# print(df.shape)
# print("returns no of rows",df.shape[0])
# print("returns no of columns",df.shape[1])
# print("*"*50)
# print("df describe")
# print(df.dtypes)
# print(df.describe())
print("*"*50)
print(df)
print("df query")
print(df.query(4:6))
# print("*"*50)
# print(df.nlargest(5,salary))
# print(df.nsmallest(5,salary))
**********************************************************
# import pandas as pd
# df1 = pd.DataFrame({'ID': [1, 2, 3, 4],
# 'Name': ['Alice', 'Bob', 'Charlie', 'David']})
# df2 = pd.DataFrame({ 'ID': [3, 4, 5, 6],
# 'Age': [25, 30, 35, 40],'Name': ['Alice', 'Bob', 'Charlie', 'David']})
# # print("df1 is")
# # print(df1)
# # print("df2 is")
# # print(df2)
# # df_concat=pd.concat([df1,df2],axis=0)
# # print("df_concat")
# # print(df_concat)
##
# # df_concat_h=pd.concat([df1,df2],axis=1)
# # print("df_concat_h")
# # print(df_concat_h)
#
#
# #drop duplicates
# import pandas as pd
# df3 = pd.DataFrame({'ID': [1, 2, 3, 4],
# 'Name': ['Alice', 'Bob', 'Charlie', 'David']})
# df4 = pd.DataFrame({'ID': [1, 2, 3, 4],
# 'Name': ['Alice', 'Bob', 'Charlie', 'David']})
#
# df_concat_h1=pd.concat([df3,df4]) #union
# print("df_concat_h1")
# print(df_concat_h1)
# df_concat_h2=pd.concat([df3,df4]).drop_duplicates() #union all
# print("df_concat_h2")
# print(df_concat_h2)
#JOINS
import pandas as pd
df1 = pd.DataFrame({'ID': [1, 2, 3, 4],
'Name': ['Alice', 'Bob', 'Charlie', 'David']})
df2 = pd.DataFrame({ 'ID': [3, 4, 5, 6],
'Age': [25, 30, 35, 40]})
print("df1 is")
print(df1)
pd_inner=pd.merge(df1,df2,on='ID',how='inner')
print("pd_inner")
print(pd_inner)