Better Data Science - Make Synthetic Datasets With Python
Better Data Science - Make Synthetic Datasets With Python
with Python
● Library imports
● rcParams is only here for plot stylings
In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False
Adding noise
plot(df=df, x1='x1', x2='x2', y='y', title='Dataset with 2 classes - Class imbalance (y = 1)')
plot(df=df, x1='x1', x2='x2', y='y', title='Dataset with 2 classes - Class imbalance (y = 0)')
plot(df=df, x1='x1', x2='x2', y='y', title='Dataset with 2 classes - Make classification easier')