Import As Import As From Import Import As Matplotlib Import Import
Import As Import As From Import Import As Matplotlib Import Import
Import As Import As From Import Import As Matplotlib Import Import
import warnings
warnings.filterwarnings('ignore')
#df = pd.read_csv('/Users/sangeetha/consolidated_output.csv')
df = pd.read_csv('/Users/sangeetha/outputweo1.csv')
to_drop = ['match_id','match_num',
'duration',
'totalsets',
'P1_setswon',
'P2_setswon','P1_gameswon','P2_gameswon',
'P1_pointswon','P2_pointswon','winner','player1_rating','player2_
'prob',
'weo_prob',
'win_play_2',
'updated_win_match_1','updated_win_match_2',
'updated_total_match_1', 'updated_total_match_2',
'player1_fg','player2_fg',
'current_rating_player1','current_rating_player2',
'updated_rating_player1','updated_rating_player2',
#'current_weo_rating_player1','current_weo_rating_player2']
'updated_elo_rating_player1','updated_elo_rating_player2']
df['win_ratio_player1'] = df['win_match_1'] / df['total_match_1']
df['win_ratio_player2'] = df['win_match_2'] / df['total_match_2']
df.drop(columns=to_drop, inplace=True)
to_drop = [
'total_match_1', 'total_match_2',
'win_match_1', 'win_match_2']
df.drop(columns=to_drop, inplace=True)
print(df.columns)
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 1/5
23/07/2023, 23:52 2b
Index(['year', 'player1', 'player2', 'win_play_1',
'current_weo_rating_player1', 'current_weo_rating_player2',
'win_ratio_player1', 'win_ratio_player2'],
dtype='object')
In [64]: # Handle missing values, excluding the 'match_id' column from imputation
imputer = SimpleImputer(strategy='mean')
numeric_cols = df.select_dtypes(include=[float, int]).columns
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])
# Splitting the data into training and test sets based on the year (2019)
train_data = df[df['year'] < 2019]
test_data = df[df['year'] == 2019]
# Drop unnecessary columns (match_id, year) and target column (winner) from
X_train = train_data.drop(columns=['year', 'win_play_1'])
y_train = train_data['win_play_1']
# Drop unnecessary columns (match_id, year) and target column (winner) from
X_test = test_data.drop(columns=['year', 'win_play_1'])
y_test = test_data['win_play_1']
# Combine player1 and player2 names from both training and test sets for lab
combined_players = pd.concat([X_train['player1'], X_train['player2'], X_test
label_encoder = LabelEncoder()
label_encoder.fit(combined_players)
##############################################
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
##########################################
rf_classifier.fit(X_train, y_train)
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 2/5
23/07/2023, 23:52 2b
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_recall = recall_score(y_test, svm_predictions)
top_feature_indices = np.argsort(feature_importances)[::-1][:10]
top_features = [(f"Feature {index}", importance) for index, importance in zi
Feature Importance
3 Feature_3 0.223542
2 Feature_2 0.213274
0 Feature_0 0.173855
1 Feature_1 0.162643
5 Feature_5 0.113348
4 Feature_4 0.113338
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 3/5
23/07/2023, 23:52 2b
# Create a heatmap using seaborn
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False, square=True,
xticklabels=["Negative", "Positive"],
yticklabels=["Negative", "Positive"])
plt.show()
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, svm_predictions)
plt.show()
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 4/5
23/07/2023, 23:52 2b
In [ ]:
In [ ]:
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 5/5