AI Assignment: Asad Nasir - 37 Muhammad Usman Ali - 29 Momin - 49
AI Assignment: Asad Nasir - 37 Muhammad Usman Ali - 29 Momin - 49
AI Assignment: Asad Nasir - 37 Muhammad Usman Ali - 29 Momin - 49
(Done By 18-SE-30)
Calculating Term Frequency:
#computing term frequency def compute_tf(docs_list): for doc in docs_list:
doc1_lst = doc.split(" ")
wordDict_1= dict.fromkeys(set(doc1_lst), 0)
Output:
Output:
(Done by 18-SE-37)
if numDocumentsWithThisTerm > 0:
return 1.0 + math.log(float(len(allDocuments)) / numDocumentsWithThisTerm
) else:
return 1.0
Output:
TF*IDF:
def compute_tfidf_with_alldocs(documents , query): tf_idf = [] index = 0
query_tokens = query.split()
df = pd.DataFrame(columns=['doc'] + query_tokens) for doc in documents:
df['doc'] = np.arange(0 , len(documents)) doc_num = tf_doc[index]
sentence = doc.split() for word in sentence:
for text in query_tokens:
if(text == word):
idx = sentence.index(word)
tf_idf_score = doc_num[word] * idf_dict[word]
tf_idf.append(tf_idf_score)
df.iloc[index, df.columns.get_loc(word)] = tf_idf_score
index += 1
df.fillna(0 , axis=1, inplace=True) return tf_idf , df
Output:
Done by 18-SE-49
Cosine Similarity:
Image("image.jpg")
#Normalized TF for the query string("life learning") def compute_query_tf(query):
query_norm_tf = {}
tokens = query.split() for word in tokens: query_norm_tf[word] =
termFrequency(word , query) return query_norm_tf query_norm_tf = compute_quer
y_tf(query)
print("\n\nNormalized TF for the query string(life learning):\n") print(query_nor
m_tf)
Output:
Done by 18-SE-29