This Study Resource Was
This Study Resource Was
/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
import dataframe as df
# **Data Loading**
# **Fill in the Command to load your CSV dataset "weather.csv" with pandas**
# In[2]:
m
# **Data Analysis**
er as
#
co
# - Get the shape of the dataset and print it.
eH w
#
# - Get the column names in list and print it.
o.
# rs e
# - Describe the dataset to understand the basic statistics of the dataset.
ou urc
#
# - Print the first three rows of the dataset
# In[5]:
o
aC s
v i y re
data_size=weather.size
print(data_size)
weather_col_names =weather.columns
ed d
ar stu
print(weather_col_names)
print( weather.describe() )
print( weather.iloc[:3] )
sh is
Th
# **Target Identification**
#
# Execute the below cell to identify the target variables. If yes it will Rain
Tommorow otherwise it will not Rain.
# In[6]:
weather_target=weather['RainTomorrow']
print(weather_target)
# **Feature Identification**
#
# In our case by analyzing the dataset, we can understand that the columns like
This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00
https://www.coursehero.com/file/79338366/structured-testpy/
**Date** might be irrelevant as they are not dependent on call usage pattern.
#
# Since **RainTomorrow** is our target variable, we will be removing it from the
feature set.
#
# - Perform appropriate operation to drop the columns **Date** and
**RainTomorrow**
# In[10]:
cols_to_drop = ['Date','RainTomorrow']
weather_feature = weather.drop(columns=cols_to_drop)
print(weather_feature.head(5))
# **Categorical Data**
#
# In order to Identify the categorical variable in a data, use the following
command in the below cell,
m
er as
# In[11]:
co
eH w
weather_categorical = weather.select_dtypes(include=[object])
o.
print(weather_categorical.head(15)) rs e
ou urc
# **Convert to boolean**
#
# Assign the column **RainToday** for the variable **yes_no_cols** and run the
o
#
v i y re
# In[14]:
yes_no_cols = ["RainToday"]
ed d
ar stu
print(weather_feature.head(5))
sh is
# In[15]:
weather_dumm=pd.get_dummies(weather_feature,
columns=["Location","WindGustDir","WindDir9am","WindDir3pm"],
prefix=["Location","WindGustDir","WindDir9am","WindDir3pm"])
weather_matrix = weather_dumm.values.astype(np.float)
# **Imputing-Missing Values**
#
# Do the Imputing-Missing Values by using the following parameters
#
This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00
https://www.coursehero.com/file/79338366/structured-testpy/
# - missing_values=np.nan
# - strategy=mean
# - fill_value=None
# - verbose=0
# - copy=True
#
# In[16]:
weather_matrix=imp.fit_transform(weather_matrix)
# **Standardization**
#
# Run the below cell to perform standardization
m
# In[17]:
er as
co
eH w
from sklearn.preprocessing import StandardScaler
o.
#Standardize the data by removing the mean and scaling to unit variance
rs e
ou urc
scaler = StandardScaler()
weather_matrix = scaler.fit_transform(weather_matrix)
aC s
v i y re
90% as train data and 10% as test data and set random_state as seed.
# In[20]:
sh is
seed=5000
train_data, test_data, train_label, test_label =
train_test_split(weather_matrix, weather_target,train_size=.9, test_size=0.1,
random_state=seed)
This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00
https://www.coursehero.com/file/79338366/structured-testpy/
#
# - Evaluate the classifier with score from test_data and test_label
#
# - Print the predicted score
#
#
# In[24]:
churn_predicted_target=classifier.predict( test_data )
m
er as
with open('output.txt', 'w') as file:
co
file.write(str(np.mean(score)))
eH w
o.
# **Random Forest Classifier** rs e
#
ou urc
# - Do the **Random Forest** Classifier of the Dataset using the following
parameters.
# - max_depth=5
# - n_estimators=10
o
# - max_features=10
aC s
# - random_state=seed
v i y re
#
# - Train the model with train_data and train_label.
#
# - Now predict the output with test_data.
#
ed d
# In[26]:
sh is
churn_predicted_target=classifier.predict( test_data )
This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00
https://www.coursehero.com/file/79338366/structured-testpy/
# In[ ]:
m
er as
co
eH w
o.
rs e
ou urc
o
aC s
v i y re
ed d
ar stu
sh is
Th
This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00
https://www.coursehero.com/file/79338366/structured-testpy/
Powered by TCPDF (www.tcpdf.org)