Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
27 views6 pages

MLExp 3

Download as docx, pdf, or txt
Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1/ 6

Exp. No. 3.

Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this knowledge
to classify a new sample.

Dataset:

PlayTennis Dataset is saved as .csv (comma separated values) file in the current working
directory otherwise use the complete path of the dataset set in the program:

Day Outlook Temperature Humidity Wind PlayTennis

D1 Sunny Hot High Weak No

D2 Sunny Hot High Strong No

D3 Overcast Hot High Weak Yes

D4 Rain Mild High Weak Yes

D5 Rain Cool Normal Weak Yes

D6 Rain Cool Normal Strong No

D7 Overcast Cool Normal Strong Yes

D8 Sunny Mild High Weak No

D9 Sunny Cool Normal Weak Yes

D10 Rain Mild Normal Weak Yes

D11 Sunny Mild Normal Strong Yes

D12 Overcast Mild High Strong Yes

D13 Overcast Hot Normal Weak Yes


D14 Rain Mild High Strong No

import pandas as pd

import math

import numpy as np

data = pd.read_csv("3-dataset.csv")

features = [feat for feat in data]

features.remove("answer")

class Node:

def __init__(self):

self.children = []

self.value = ""

self.isLeaf = False

self.pred = ""

def entropy(examples):

pos = 0.0

neg = 0.0

for _, row in examples.iterrows():

if row["answer"] == "yes":

pos += 1
else:

neg += 1

if pos == 0.0 or neg == 0.0:

return 0.0

else:

p = pos / (pos + neg)

n = neg / (pos + neg)

return -(p * math.log(p, 2) + n * math.log(n, 2))

def info_gain(examples, attr):

uniq = np.unique(examples[attr])

#print ("\n",uniq)

gain = entropy(examples)

#print ("\n",gain)

for u in uniq:

subdata = examples[examples[attr] == u]

#print ("\n",subdata)

sub_e = entropy(subdata)

gain -= (float(len(subdata)) / float(len(examples))) * sub_e

#print ("\n",gain)

return gain
def ID3(examples, attrs):

root = Node()

max_gain = 0

max_feat = ""

for feature in attrs:

#print ("\n",examples)

gain = info_gain(examples, feature)

if gain > max_gain:

max_gain = gain

max_feat = feature

root.value = max_feat

#print ("\nMax feature attr",max_feat)

uniq = np.unique(examples[max_feat])

#print ("\n",uniq)

for u in uniq:

#print ("\n",u)

subdata = examples[examples[max_feat] == u]

#print ("\n",subdata)
if entropy(subdata) == 0.0:

newNode = Node()

newNode.isLeaf = True

newNode.value = u

newNode.pred = np.unique(subdata["answer"])

root.children.append(newNode)

else:

dummyNode = Node()

dummyNode.value = u

new_attrs = attrs.copy()

new_attrs.remove(max_feat)

child = ID3(subdata, new_attrs)

dummyNode.children.append(child)

root.children.append(dummyNode)

return root

def printTree(root: Node, depth=0):

for i in range(depth):

print("\t", end="")

print(root.value, end="")
if root.isLeaf:

print(" -> ", root.pred)

print()

for child in root.children:

printTree(child, depth + 1)

root = ID3(data, features)

printTree(root)

You might also like