Titanic Classification
Titanic Classification
Titanic Classification
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
df = pd.read_csv("titanic.csv")
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
df.head()
df.isnull().sum()
PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
plt.boxplot(df[['Fare']].fillna(0))
<Axes: >
sns.pairplot(df[['Age','Fare','Pclass','Survived']],hue='Survived')
<seaborn.axisgrid.PairGrid at 0x7cc9a265d6f0>
df['Age'] = df['Age'].fillna(df['Age'].median())
rf = RandomForestClassifier()
rf.fit(train[['Age','Fare','Pclass']], train[['Survived']])
RandomForestClassifier()
print("Train Accuracy: ",
round(rf.score(train[['Age','Fare','Pclass']], train[['Survived']]),
2))
result = rf.predict(test[['Age','Fare','Pclass']])
0.6753731343283582
svm = SVC(kernel='linear')
svm.fit(train[['Age','Fare','Pclass']], train[['Survived']])
/usr/local/lib/python3.10/dist-packages/sklearn/utils/
validation.py:1143: DataConversionWarning: A column-vector y was
passed when a 1d array was expected. Please change the shape of y to
(n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
SVC(kernel='linear')
print(svm.score(test[['Age','Fare','Pclass']], test[['Survived']]))
0.6940298507462687