Titanic Classification

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
df = pd.read_csv("titanic.csv")
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
df.head()
PassengerId Survived Pclass \

0 1 0 3
1 2 1 1
2 3 1 3
3 4 1 1
4 5 0 3
Name Sex Age

SibSp \
0 Braund, Mr. Owen Harris male 22.0
1
1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0
1
2 Heikkinen, Miss. Laina female 26.0
0
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0
1
4 Allen, Mr. William Henry male 35.0
0
Parch Ticket Fare Cabin Embarked

0 0 A/5 21171 7.2500 NaN S
1 0 PC 17599 71.2833 C85 C
2 0 STON/O2. 3101282 7.9250 NaN S
3 0 113803 53.1000 C123 S
4 0 373450 8.0500 NaN S
df.isnull().sum()
PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
plt.boxplot(df[['Fare']].fillna(0))
{'whiskers': [<matplotlib.lines.Line2D at 0x7cc99da9aad0>,

<matplotlib.lines.Line2D at 0x7cc99da9ad70>],
'caps': [<matplotlib.lines.Line2D at 0x7cc99da9b010>,
<matplotlib.lines.Line2D at 0x7cc99da9b2b0>],
'boxes': [<matplotlib.lines.Line2D at 0x7cc99da9a830>],
'medians': [<matplotlib.lines.Line2D at 0x7cc99da9b550>],
'fliers': [<matplotlib.lines.Line2D at 0x7cc99da9b7f0>],
'means': []}
plt.boxplot(df[['Age']].fillna(df['Age'].median()))
{'whiskers': [<matplotlib.lines.Line2D at 0x7cc99db2d510>,

<matplotlib.lines.Line2D at 0x7cc99db2d7b0>],
'caps': [<matplotlib.lines.Line2D at 0x7cc99db2da50>,
<matplotlib.lines.Line2D at 0x7cc99db2dcf0>],
'boxes': [<matplotlib.lines.Line2D at 0x7cc99db2d270>],
'medians': [<matplotlib.lines.Line2D at 0x7cc99db2df90>],
'fliers': [<matplotlib.lines.Line2D at 0x7cc99db2e230>],
'means': []}
sns.heatmap(df[['Age','Fare','Pclass','Survived']].corr())
<ipython-input-20-9d1bb16c3506>:1: FutureWarning: The default value of

numeric_only in DataFrame.corr is deprecated. In a future version, it
will default to False. Select only valid columns or specify the value
of numeric_only to silence this warning.
sns.heatmap(df[['Age','Fare','Sex','Pclass','Survived']].corr())
<Axes: >
sns.pairplot(df[['Age','Fare','Pclass','Survived']],hue='Survived')
<seaborn.axisgrid.PairGrid at 0x7cc9a265d6f0>
df['Age'] = df['Age'].fillna(df['Age'].median())
train, test = train_test_split(df[['Age','Fare','Pclass','Survived']],

test_size=0.3)
rf = RandomForestClassifier()
rf.fit(train[['Age','Fare','Pclass']], train[['Survived']])
<ipython-input-37-e5cca9f45d48>:1: DataConversionWarning: A column-

vector y was passed when a 1d array was expected. Please change the
shape of y to (n_samples,), for example using ravel().
rf.fit(train[['Age','Fare','Pclass']], train[['Survived']])
RandomForestClassifier()
print("Train Accuracy: ",
round(rf.score(train[['Age','Fare','Pclass']], train[['Survived']]),
2))
Train Accuracy: 0.96
result = rf.predict(test[['Age','Fare','Pclass']])
np.mean( np.array(result) == np.array(test['Survived']))
0.6753731343283582
svm = SVC(kernel='linear')
svm.fit(train[['Age','Fare','Pclass']], train[['Survived']])
/usr/local/lib/python3.10/dist-packages/sklearn/utils/
validation.py:1143: DataConversionWarning: A column-vector y was
passed when a 1d array was expected. Please change the shape of y to
(n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
SVC(kernel='linear')
print(svm.score(test[['Age','Fare','Pclass']], test[['Survived']]))
0.6940298507462687

Titanic Classification

Uploaded by

Copyright:

Available Formats

Titanic Classification

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Titanic Classification

Uploaded by

Copyright:

Available Formats

import pandas as pd

PassengerId Survived Pclass \

Name Sex Age

Parch Ticket Fare Cabin Embarked

{'whiskers': [<matplotlib.lines.Line2D at 0x7cc99da9aad0>,

{'whiskers': [<matplotlib.lines.Line2D at 0x7cc99db2d510>,

<ipython-input-20-9d1bb16c3506>:1: FutureWarning: The default value of

train, test = train_test_split(df[['Age','Fare','Pclass','Survived']],

<ipython-input-37-e5cca9f45d48>:1: DataConversionWarning: A column-

Train Accuracy: 0.96

np.mean( np.array(result) == np.array(test['Survived']))

You might also like