Data Loading- Jupyter Notebook
Data Loading- Jupyter Notebook
In [1]:
import pandas as pd
In [3]:
d = pd.read_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\data1.xlsx")
In [4]:
df = pd.DataFrame(d)
In [8]:
print(df)
Calories
0 409.0
1 445.0
2 451.0
3 431.0
4 421.0
.. ...
164 NaN
165 NaN
166 NaN
167 NaN
168 NaN
In [9]:
print(df.to_string())
df.to_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.xlsx")
In [13]:
df.to_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.xlsx",index= False)
In [14]:
df.to_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.csv",index= False)
In [15]:
df.to_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.txt",index= False)
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 1/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [17]:
In [18]:
d = pd.read_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\data1.xlsx")
df = pd.DataFrame(d)
print(df)
In [21]:
df.loc[3]
Out[21]:
Duration 65.0
Pulse 104.0
Maxpulse 145.0
Calories 431.0
Name: 3, dtype: float64
In [24]:
df.loc[3,'Duration']
Out[24]:
65
In [25]:
df.loc[0:4]
Out[25]:
In [27]:
df.loc[0:4,["Duration","Calories"]]
Out[27]:
Duration Calories
0 60 409.0
1 40 445.0
2 60 451.0
3 65 431.0
4 60 421.0
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 2/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [30]:
df.loc[0:4,"Duration":"Calories"]
Out[30]:
In [32]:
df.iloc[0:4,0:5]
Out[32]:
In [120]:
df.iloc[0:4]
Out[120]:
In [38]:
df.iloc[[0,9]]
Out[38]:
In [122]:
df.iloc[:11,0:3]
Out[122]:
0 60 100.0 145.0
1 60 100.0 145.0
2 60 103.0 134.0
3 65 104.0 145.0
4 60 105.0 132.0
5 70 106.0 133.0
6 60 107.0 134.0
7 75 108.0 132.0
8 60 109.0 131.0
9 80 100.0 1125.0
10 60 110.0 124.0
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 3/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [43]:
print(df)
In [44]:
df.dropna()
Out[44]:
In [45]:
df.dropna(inplace = True)
In [47]:
df.fillna("missing")
print(df.to_string())
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 4/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [48]:
df.dropna(inplace = True)
print(df.to_string())
print(df)
In [50]:
print(df.to_string())
df.to_json("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.json")
print(df)
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 5/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [54]:
print(df.to_string())
# If your JSON code is not in a file, but in a Python Dictionary, you can load it into a DataFrame directly:
data = {
"Duration":{
"0":60,
"1":60,
"2":60,
"3":45,
"4":45,
"5":60
},
"Pulse":{
"0":110,
"1":117,
"2":103,
"3":109,
"4":117,
"5":102
},
"Maxpulse":{
"0":130,
"1":145,
"2":135,
"3":175,
"4":148,
"5":127
},
"Calories":{
"0":409,
"1":479,
"2":340,
"3":282,
"4":406,
"5":300
}
}
df = pd.DataFrame(data)
print(df)
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 6/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [56]:
# The DataFrames object has a method called info(), that gives you more information about the data set.
print(df.info())
<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 0 to 5
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Duration 6 non-null int64
1 Pulse 6 non-null int64
2 Maxpulse 6 non-null int64
3 Calories 6 non-null int64
dtypes: int64(4)
memory usage: 240.0+ bytes
None
In [57]:
# Empty cells
# Data in wrong format
# Wrong data
# Duplicates
# Empty cells can potentially give you a wrong result when you analyze data.
df.to_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.csv",index= False)
new_df = df.dropna()
print(new_df.to_string())
In [65]:
d = pd.read_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.csv")
new_df = df.dropna()
print(new_df.to_string())
df.dropna(inplace = True)
print(df.to_string())
In [60]:
print(df)
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 7/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [61]:
d = pd.read_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.csv")
df = pd.DataFrame(d)
print(df)
In [62]:
df.to_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata1.csv",index= False)
print(df)
In [64]:
In [66]:
df.duplicated()
Out[66]:
0 False
1 False
2 False
3 False
4 False
5 False
dtype: bool
In [67]:
d = pd.read_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\data1.xlsx")
df = pd.DataFrame(d)
print(df)
In [77]:
df.to_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata2.csv",index= False)
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 8/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [72]:
print(df)
In [73]:
df.duplicated()
Out[73]:
0 False
1 False
2 False
3 False
4 False
...
123 False
124 False
125 False
126 False
127 False
Length: 128, dtype: bool
In [78]:
print(df.to_string())
df.dropna()
print(df.to_string())
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 9/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [90]:
d = pd.read_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\data1.xlsx")
df = pd.DataFrame(d)
print(df)
In [82]:
df.dropna()
print(df.to_string())
df.dropna(inplace = True)
In [119]:
df.fillna("missing")
Out[119]:
In [115]:
d = pd.read_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\data1.xlsx")
df = pd.DataFrame(d)
df.dropna(inplace = True)
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 10/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [116]:
df.to_csv("C:\\Users\\Nisha\\OneDrive\\Desktop\\Newdata2.csv",index= False)
df.fillna(130, inplace = True)
#print(df)
# print(df.to_string())
In [98]:
df.duplicated()
Out[98]:
0 False
1 True
2 False
3 False
4 False
...
123 False
124 False
125 False
126 False
127 False
Length: 122, dtype: bool
In [99]:
df.drop_duplicates()
Out[99]:
In [100]:
df.drop_duplicates(inplace = True)
In [101]:
print(df)
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 11/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [108]:
d = pd.read_excel("C:\\Users\\Nisha\\OneDrive\\Desktop\\data1.xlsx")
df = pd.DataFrame(d)
print(df)
In [111]:
df.loc[df["Maxpulse"]<135]
Out[111]:
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 12/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
In [126]:
df.loc[(df["Maxpulse"]<135)|(df["Pulse"]<125)]
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 13/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
Out[126]:
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 14/15
2/4/23, 1:43 PM Untitled7 - Jupyter Notebook
Duration Pulse Maxpulse Calories
127[124]: 60
In 164.0 131.0 421.0
df.loc[(df["Maxpulse"]>135)&(df["Pulse"]>145)]
Out[124]:
In [ ]:
df.loc[df["Maxpulse"]<135]
localhost:8892/notebooks/Untitled7.ipynb?kernel_name=python3 15/15