Program
Program
Demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for
building the decision tree and apply this knowledge to classify a new sample.
import pandas as pd
import math
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# a utility function for checking purity and impurity of a child
# -----------------------------------------------------------------------
def counter(target, attribute, i):
p=0
n=0
targets = list(set(target))
for j, k in zip(target, attribute):
if j == targets[0] and k == i:
p=p+1
elif j == targets[1] and k == i:
n=n+1
return p, n
# -----------------------------------------------------------------------
# function that calculates the information gain
# -----------------------------------------------------------------------
def Information_Gain(dataset, feature):
Distinct = list(set(feature))
Info_Gain = 0
for i in Distinct:
Info_Gain = Info_Gain + feature.count(i) / len(feature) * entropy(dataset,feature, i)
Info_Gain = base_entropy(dataset) - Info_Gain
return Info_Gain
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# ---------------------------------------------------------------------------
# main function
# -----------------------------------------------------------------------
def main():
df = pd.read_csv("filename.csv")
tree = dict()
result = construct_tree(df, tree)
for key, value in result.items():
print(key, " => ", value)
# -----------------------------------------------------------------------
if __name__ == "__main__":
main()
OUTPUT
outlook => {'sunny': (3, 2), 'overcast': (0, 4), 'rainy': (2, 3)}
overcast => yes
temp => {'mild': (1, 2), 'cool': (1, 1)}
hot => no
cool => yes
humidity => {'normal': (1, 1)}
high => no
normal => yes
windy => {'Weak': (0, 1), 'Strong': (1, 0)}
Weak => yes
Strong => no