opendata/20220801/time_sum.py

126 lines
8.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import demjson3
#显示所有的列
pd.set_option('display.max_columns', None)
#显示所有的行
pd.set_option('display.max_rows', None)
# #设置value的显示长度为100默认为50
# pd.set_option('max_colwidth',100)
data1 = pd.read_excel("文化A卷整理.xlsx",converters = {'ticket_id' : str})
data3 = pd.read_excel("文化Z卷整理.xlsx",converters = {'ticket_id' : str})
# mid_data = pd.read_csv("mid_data.csv")
data2_1 = pd.read_excel("ticket_log_PBL_testing3—过程数据.xlsx",sheet_name="Result 1")
data2_2 = pd.read_excel("ticket_log_PBL_testing3—过程数据.xlsx",sheet_name="Result 2")
data2_3 = pd.read_excel("ticket_log_PBL_testing3—过程数据.xlsx",sheet_name="Result 3")
data2_4 = pd.read_excel("ticket_log_PBL_testing3—过程数据.xlsx",sheet_name="Result 4")
#
frames = []
frames.append(data2_1)
print(1)
frames.append(data2_2)
print(2)
frames.append(data2_3)
print(3)
frames.append(data2_4)
print(4)
data2 = pd.concat(frames)
print(5)
data2.drop(index = data2[(data2['ticket_id']== '140702141110821')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['ticket_id']== '330300211060121')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['ticket_id']== '330300181090121')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['ticket_id']== '330300141123521')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['task_name']== '导言')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['task_name']== '热身题【本题不计入总分】')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['task_name']== '生活水平问题')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['task_name']== '运动会问题')].index.tolist(),inplace=True)
data2.drop(index = data2[(data2['task_name']== '【后测版】刷牙问题')].index.tolist(),inplace=True)
data2['task_answer'] = data2['task_answer'].str.replace(' ','')
global null
null = ""
global false
false = ""
global true
true = ""
def T2(mes):
if pd.isnull(mes):
return 0
if 'basic' in demjson3.decode(mes):
return len(demjson3.decode(mes)["basic"][-1])
else:
return 0
mid_data = data2.loc[lambda data2 :(data2["task_name"]=="学习区:神话与民族精神") | (data2["task_name"]=="场景一")|(data2["task_name"]=="创作区:神话与文化创新"),:]
mid_data['basic'] = mid_data.apply(lambda x: T2(x["task_answer"]), axis=1)
mid_data = pd.read_csv("mid_data.csv",encoding="utf-8",converters={'ticket_id' : str})
print(data2[(data2['ticket_id'] == '110107031021721')])
for i in range(len(data1)):
print(i)
if pd.isnull(data1.loc[i]["start_time"]) or pd.isnull(data1.loc[i]["stop_time"]):
row_index = data2[data2['ticket_id'] == data1.loc[i]['ticket_id']].index.tolist()
if len(row_index)>0:
data1.at[i,"start_time"] = pd.to_datetime(data2.loc[row_index[0]]["timestamp"])
data1.at[i,"stop_time"] = pd.to_datetime(data2.loc[row_index[-1]]["timestamp"])
data1.at[i,"cost_time"] = pd.to_timedelta(pd.to_datetime(data1.loc[i]["stop_time"]) - pd.to_datetime(data1.loc[i]["start_time"])).total_seconds()
row_index1 = data2[(data2['task_name'] == '穿越区:神话与远古先民') & (data2['ticket_id'] == data1.loc[i]['ticket_id'])].index.tolist()
row_index2 = mid_data[(mid_data['basic'] == 0) & (mid_data['task_name'] == '学习区:神话与民族精神')& (mid_data['ticket_id'] == data1.loc[i]['ticket_id'])].index.tolist()
row_index3 = mid_data[(mid_data['basic'] > 0) & (mid_data['task_name'] == '学习区:神话与民族精神')& (mid_data['ticket_id'] == data1.loc[i]['ticket_id'])].index.tolist()
row_index4 = data2[(data2['task_name'] == '辩论区:神话与当代传承') & (data2['ticket_id'] == data1.loc[i]['ticket_id'])].index.tolist()
row_index5 = mid_data[(mid_data['task_name'] == "创作区:神话与文化创新")& (mid_data['ticket_id'] == data1.loc[i]['ticket_id'])].index.tolist()
row_index6 = mid_data[(mid_data['basic'] > 0) & (mid_data['task_name'] == "创作区:神话与文化创新")& (mid_data['ticket_id'] == data1.loc[i]['ticket_id'])].index.tolist()
if len(row_index1) > 0:
data1.at[i,"T1_strat_time"] = pd.to_datetime(data2.loc[row_index1[0]]["timestamp"])
data1.at[i,"T1_stop_time"] = pd.to_datetime(data2.loc[row_index1[-1]]["timestamp"])
data1.at[i,"T1_cost_time"] = pd.to_timedelta(data1.loc[i]["T1_stop_time"] - data1.loc[i]["T1_strat_time"]).total_seconds()
if len(row_index2) >0:
data1.at[i,"T2_strat_time"] = pd.to_datetime(mid_data.loc[row_index2[0]]["timestamp"])
data1.at[i,"T2_stop_time"] = pd.to_datetime(mid_data.loc[row_index2[-1]]["timestamp"])
data1.at[i,"T2_cost_time"] = pd.to_timedelta(data1.loc[i]["T2_stop_time"] - data1.loc[i]["T2_strat_time"]).total_seconds()
if len(row_index3) > 0:
data1.at[i,"T3_strat_time"] = pd.to_datetime(mid_data.loc[row_index3[0]]["timestamp"])
data1.at[i,"T3_stop_time"] = pd.to_datetime(mid_data.loc[row_index3[-1]]["timestamp"])
data1.at[i,"T3_cost_time"] = pd.to_timedelta(data1.loc[i]["T3_stop_time"] - data1.loc[i]["T3_strat_time"]).total_seconds()
if len(row_index4) > 0:
data1.at[i, "T4_strat_time"] = pd.to_datetime(data2.loc[row_index4[0]]["timestamp"])
data1.at[i, "T4_stop_time"] = pd.to_datetime(data2.loc[row_index4[-1]]["timestamp"])
data1.at[i, "T4_cost_time"] = pd.to_timedelta(
data1.loc[i]["T4_stop_time"] - data1.loc[i]["T4_strat_time"]).total_seconds()
if len(row_index6) >0:
data1.at[i, "T5_strat_time"] = pd.to_datetime(data2.loc[row_index5[0]]["timestamp"])
data1.at[i, "T5_stop_time"] = pd.to_datetime(data2.loc[row_index6[0]]["timestamp"])
data1.at[i, "T5_cost_time"] = pd.to_timedelta(
data1.loc[i]["T5_stop_time"] - data1.loc[i]["T5_strat_time"]).total_seconds()
data1.at[i, "T6_strat_time"] = pd.to_datetime(data2.loc[row_index6[0]]["timestamp"])
data1.at[i, "T6_stop_time"] = pd.to_datetime(data2.loc[row_index6[-1]]["timestamp"])
data1.at[i, "T6_cost_time"] = pd.to_timedelta(
data1.loc[i]["T6_stop_time"] - data1.loc[i]["T6_strat_time"]).total_seconds()
for i in range(len(data3)):
print(i)
if pd.isnull(data3.loc[i]["start_time"]) or pd.isnull(data3.loc[i]["stop_time"]):
row_index = data2[data2['ticket_id'] == data3.loc[i]['ticket_id']].index.tolist()
if len(row_index)>0:
data3.at[i,"start_time"] = pd.to_datetime(data2.loc[row_index[0]]["timestamp"])
data3.at[i,"stop_time"] = pd.to_datetime(data2.loc[row_index[-1]]["timestamp"])
data3.at[i,"cost_time"] = pd.to_timedelta(pd.to_datetime(data3.loc[i]["stop_time"]) - pd.to_datetime(data3.loc[i]["start_time"])).total_seconds()
row_index1 = mid_data[(mid_data['basic'] > 0) & (mid_data['task_name'] == '场景一') & (mid_data['ticket_id'] == data3.loc[i]['ticket_id'])].index.tolist()
row_index0 = data2[(data2['task_name'] == '场景一') & (data2['ticket_id'] == data3.loc[i]['ticket_id'])].index.tolist()
row_index2 = data2[(data2['task_name'] == '场景二') & (data2['ticket_id'] == data3.loc[i]['ticket_id'])].index.tolist()
if len(row_index1) > 0:
data3.at[i,"T1_strat_time"] = pd.to_datetime(data2.loc[row_index0[0]]["timestamp"])
data3.at[i,"T1_stop_time"] = pd.to_datetime(mid_data.loc[row_index1[0]]["timestamp"])
data3.at[i,"T1_cost_time"] = pd.to_timedelta(data3.loc[i]["T1_stop_time"] - data3.loc[i]["T1_strat_time"]).total_seconds()
data3.at[i,"T2_strat_time"] = pd.to_datetime(mid_data.loc[row_index1[0]]["timestamp"])
data3.at[i,"T2_stop_time"] = pd.to_datetime(mid_data.loc[row_index1[-1]]["timestamp"])
data3.at[i,"T2_cost_time"] = pd.to_timedelta(data3.loc[i]["T2_stop_time"] - data3.loc[i]["T2_strat_time"]).total_seconds()
if len(row_index2) > 0:
data3.at[i,"T3_strat_time"] = pd.to_datetime(data2.loc[row_index2[0]]["timestamp"])
data3.at[i,"T3_stop_time"] = pd.to_datetime(data2.loc[row_index2[-1]]["timestamp"])
data3.at[i,"T3_cost_time"] = pd.to_timedelta(data3.loc[i]["T3_stop_time"] - data3.loc[i]["T3_strat_time"]).total_seconds()
data1.to_csv('A卷_原始数据.csv',index = None)
data3.to_csv('Z卷_原始数据.csv',index = None)