40. 正解データを読んでLRを学習
40
file_yes = codecs.open(‘XXX.txt', 'r', 'utf-8')
file_no = codecs.open('not_XXX.txt', 'r', 'utf-8')
xs = []
ys = []
for line in file_yes:
line = preprocess(line)
xs.append(make_feature_vector(line))
ys.append(1)
for line in file_no:
line = preprocess(line)
xs.append(make_feature_vector(line))
ys.append(0)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(xs, ys)
41. 特徴ベクトルの作り方
41
from rules import rules
def make_feature_vector(s):
fv = []
for f in rules:
rs = f(s)
if rs:
fv.append(1)
else:
fv.append(0)
return fv