Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

AI Assignment: Asad Nasir - 37 Muhammad Usman Ali - 29 Momin - 49

Download as pdf or txt
Download as pdf or txt
You are on page 1of 7

AI Assignment

Asad Nasir -37


Muhammad Usman Ali -29
Momin -49
Python Code:

import math import pandas as pd import numpy as np #documents for TF-IDF


document1 = "I want to start learning to charge something in life" document2 = "r
eading something about life no one else knows" document3 = "Never stop learning"

#query query = "life learning"

(Done By 18-SE-30)
Calculating Term Frequency:
#computing term frequency def compute_tf(docs_list): for doc in docs_list:
doc1_lst = doc.split(" ")
wordDict_1= dict.fromkeys(set(doc1_lst), 0)

for token in doc1_lst:


wordDict_1[token] += 1 df = pd.DataFrame([wordDict_1])
idx = 0
new_col = ["Term Frequency"]
df.insert(loc=idx, column='Document', value=new_col)
print(df)
compute_tf([document1, document2, document3])

Output:

Normalized Term Frequency:


#Normalized Term Frequency def termFrequency(term, document):
normalizeDocument = document.lower().split()
return normalizeDocument.count(term.lower()) / float(len(normalizeDocument))

def compute_normalizedtf(documents): tf_doc = [] for txt in documents:


sentence = txt.split()
norm_tf= dict.fromkeys(set(sentence), 0)
for word in sentence: norm_tf[word] = termFrequency(word, txt
) tf_doc.append(norm_tf) df = pd.DataFrame([norm_tf]) idx
= 0
new_col = ["Normalized TF"]
df.insert(loc=idx, column='Document', value=new_col) print(df)
return tf_doc
tf_doc = compute_normalizedtf([document1, document2, document3])

Output:

(Done by 18-SE-37)

Inverse Document Frequency:


def inverseDocumentFrequency(term, allDocuments):
numDocumentsWithThisTerm = 0 for doc in range (0, len(allDocuments)):
if term.lower() in allDocuments[doc].lower().split():
numDocumentsWithThisTerm = numDocumentsWithThisTerm + 1

if numDocumentsWithThisTerm > 0:
return 1.0 + math.log(float(len(allDocuments)) / numDocumentsWithThisTerm
) else:
return 1.0

def compute_idf(documents): idf_dict = {} for doc in documents: s


entence = doc.split() for word in sentence:
idf_dict[word] = inverseDocumentFrequency(word, documents) return
idf_dict
idf_dict = compute_idf([document1, document2, document3])

compute_idf([document1, document2, document3])

Output:

TF*IDF:
def compute_tfidf_with_alldocs(documents , query): tf_idf = [] index = 0
query_tokens = query.split()
df = pd.DataFrame(columns=['doc'] + query_tokens) for doc in documents:
df['doc'] = np.arange(0 , len(documents)) doc_num = tf_doc[index]
sentence = doc.split() for word in sentence:
for text in query_tokens:
if(text == word):
idx = sentence.index(word)
tf_idf_score = doc_num[word] * idf_dict[word]
tf_idf.append(tf_idf_score)
df.iloc[index, df.columns.get_loc(word)] = tf_idf_score
index += 1
df.fillna(0 , axis=1, inplace=True) return tf_idf , df

documents = [document1, document2, document3] tf_idf , df = compute_tfidf_with_al


ldocs(documents , query) print("\n\nTF*IDF Output:\n") print(df)

Output:

Done by 18-SE-49

Cosine Similarity:
Image("image.jpg")
#Normalized TF for the query string("life learning") def compute_query_tf(query):

query_norm_tf = {}
tokens = query.split() for word in tokens: query_norm_tf[word] =
termFrequency(word , query) return query_norm_tf query_norm_tf = compute_quer
y_tf(query)
print("\n\nNormalized TF for the query string(life learning):\n") print(query_nor
m_tf)

#idf score for the query string("life learning") def compute_query_idf(query):


idf_dict_qry = {} sentence = query.split()
documents = [document1, document2, document3] for word in sentence:
idf_dict_qry[word] = inverseDocumentFrequency(word ,documents) return idf_
dict_qry
idf_dict_qry = compute_query_idf(query)

print("\n\nidf score for the query string(life learning):\n") print(idf_dict_qry)

Output:
Done by 18-SE-29

You might also like