Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

text_20241014

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 2

#!

/usr/bin/env python
# -*- coding: utf-8 -*-
# haowen.zheng

import linecache

def has_ten_spaces(s):
return ' ' * 10 in s

path_temp_file = r"C:\Users\www74\Desktop\每日问题"

# 原始文件,移动办公复制出的聊天记录
file_path = path_temp_file + r'\11.txt'
# 中间文件,不关注
file_path_temp = path_temp_file + r'\temp.txt'
# 中间文件,不关注
new_file_path = path_temp_file + r'\modified_example.txt'

with open(file_path, 'r', encoding='utf-8') as file:


lines = file.read()
#看是否存在截图,进行替换处理
lines = lines.replace("\n \n\n", "截图\n")
#重新写入文件
with open(file_path_temp, 'w', encoding='utf-8') as file_temp:
file_temp.writelines(lines)

t = []
t1 = []

with open(file_path_temp, 'r',encoding='utf-8') as file:


lines = file.readlines()
with open(new_file_path, 'w', encoding='utf-8') as file1:
content = [line for line in lines if line.strip()]
# print(non_blank_lines)
# content = file.readlines()
j = 1
for i in content:
# print(i)
if has_ten_spaces(i) and len(i.strip()) > 0 and len(i.strip()) < 10:
y = {}
if i.strip() in q:
pass
else:
t.append(i.strip())
file1.writelines(i.strip() + '\n')
j = j + 1
else:
y1 = {}
y1["序号"] = j
y1["内容"] = i
t1.append(y1)
file1.writelines(i)
j = j + 1
def read_specific_line_using_linecache(file_path, line_number):

return linecache.getline(file_path, line_number)

with open(new_file_path, 'r', encoding='utf-8') as file:


content = [line for line in lines if line.strip()]
l = 1
list_text = {}
lists_text = []
for k in content:
if k.strip() in t:
line = read_specific_line_using_linecache(new_file_path, l + 1)
try:
list_text[k.strip()] = list_text[k.strip()] + "\问:" + line.strip()
except:
list_text[k.strip()] = line.strip()
elif k.strip() in q:
line = read_specific_line_using_linecache(new_file_path, l + 1)
lists_text_temp = []
lists_text_temp.append(k.strip())
lists_text_temp.append(line)
lists_text.append(lists_text_temp)
l = l + 1

list_pd_temp = {}
for key, value in list_text.items():
for t1 in lists_text:
if key in t1[1]:
try:
list_pd_temp[key] = list_pd_temp[key] + "\答:" + t1[0] + t1[1]
except:
list_pd_temp[key] = t1[0] + t1[1]

list_pd_a = []

for key, value in list_text.items():


for key1,value1 in list_pd_temp.items():
list_pd_a_temp = {}
if key == key1:
list_pd_a_temp["提问人"] = key
list_pd_a_temp["问题"] = value
hf = ','.join([x for x in q if x in value1])
list_pd_a_temp["回复人"] = hf
list_pd_a_temp["回复内容"] = value1.replace("\n", "").replace(hf,'')
list_pd_a.append(list_pd_a_temp)

import pandas as pd
# 创建 DataFrame
df = pd.DataFrame(list_pd_a)

# 将 DataFrame 写入 Excel 文件
excel_file_path = path_temp_file + r'\output.xlsx'
df.to_excel(excel_file_path, index=False) # index=False 表示不写入行索引

You might also like