forked from Open-CT/openbrain
feat: complete the analysis of process data
This commit is contained in:
parent
1be992752c
commit
71e0b7d2dd
|
@ -0,0 +1,55 @@
|
|||
#### 过程性数据处理
|
||||
|
||||
使用过程性数据补充结果性数据中缺失的结束时间,以此可以重新进行结果性数据分析中的学生作答时长分析部分
|
||||
|
||||
- split.py - 从系统抓取的全部原始数据中分割出属于各答卷的部分
|
||||
- get_stoptime_a.py - 使用过程性数据中补充a卷结果性数据中缺失的结束时间
|
||||
- get_stoptime_z.py - 使用过程性数据中补充z卷结果性数据中缺失的结束时间
|
||||
- statistic_time.py - 重新进行作答时长统计
|
||||
|
||||
从原始过程性数据中构造出学生的个性化特征指标数据集,对每位学生作答每页具体题目均给出6项特征值
|
||||
|
||||
- preprocess_processdata_A.py - 预处理原始过程性数据,增加对每个数据记录的特征标记
|
||||
- getcharacter_processdata_A.py - 分析已完成预处理的过程性数据,得到学生个性化特征指标数据集
|
||||
|
||||
#### 特征值
|
||||
|
||||
##### Time:在每个页面上停留时间
|
||||
|
||||
- 仅考虑所有“有效停留”的时间总和,“有效停留”定义为在该页面上停留时间长于1秒
|
||||
- 目前只能对每个页面的用时进行区别,在少数情况下,一个页面上会放置多个题目,但此时学生很可能不按顺序作答题目,因此放弃处理,只分析学生在每个页面上停留的时间(其他特征值同理)
|
||||
- 划分每道题目用时的代码逻辑:
|
||||
- 新增两列page,page_time,在每页结束行的对应行(即每个page结束后第一个page改变行)标注该page的使用时长
|
||||
- 若改变了用户id或task_name,则当前时间设为starttime(即第一道题的开始时间)
|
||||
- 若改变了page且page!=1,即page != old_page and page!=1,则当前时间设为old_page的stoptime,和记录的starttime进行运算,记录old_page加到page列中,记录时间差加到page_time列中
|
||||
|
||||
##### Repeat:作答完成每个页面后,返回该页面查看的次数
|
||||
|
||||
- 指除正常按顺序查看本页面之外,学生在作答其他页面的题目时,可能希望重新查看该页面作为参考,每次回到本页面视为一次返回
|
||||
- 因为答题系统不能跳跃查看题目,如作答第5页面时希望参考第3页面,这就需要在4页面上短暂停留,但这一访问4页面的动作其实是无意义的,因此在分析时在页面的停留时间大于1秒才视为1次有效的返回停留
|
||||
- 可以根据上一步得到的page和pagetime列直接处理得到
|
||||
|
||||
##### Revise:在每个页面上修改次数
|
||||
|
||||
- 对选择题:修改答案(容易得到)
|
||||
- 对填空题:将一个连续删除序列视为一次修改(不容易得到,因为是逐行读取数据,这其实需要3行数据)
|
||||
- 增加edit列,标注每行是否是修改动作,对该列统计得到
|
||||
|
||||
##### Before:进入每个页面,到第一次作答的时间
|
||||
|
||||
- 定位每页的第一次修改和上一个动作的时间差
|
||||
- 增加before列,在每页的第一次修改行、写入该行和上一行的时间差,取该列的值得到
|
||||
|
||||
##### After:第一次完成每个页面作答,到离开页面的时间
|
||||
|
||||
- 答题系统的设计逻辑是:只有完成前一页题目,才能进入下一页题目,因此首次进入每页题目的前一个动作,必定是前一页题目的完成动作。
|
||||
- 定位某页的第一次进入和上一个动作的时间差
|
||||
- 增加after列,在每页的第一次进入行、写入该行和上一行的时间差,取该列的值得到
|
||||
|
||||
##### AR:作答完成每个页面后,返回该页面修改的次数
|
||||
|
||||
- 答题系统的设计逻辑是:只有完成前一页题目,才能进入下一页题目,因此首次进入每页题目的前一个动作,必定是前一页题目的完成动作。
|
||||
- 直接取每行对应的page(非page列)判断得到完成后返回的行,取和前文得到的edit列的交集
|
||||
- 增加AR列,对该列统计得到
|
||||
- 修改总次数 = 完成后修改次数 + 完成前修改次数
|
||||
- 判断完成后返回行、第一次进入的动作可同时进行
|
|
@ -0,0 +1,116 @@
|
|||
# ==========================================================================
|
||||
# 处理过程性数据获取学生特征值 - step2
|
||||
# 本脚本以完成了预处理的【已排序的】学生作答过程性作为待处理的原始数据文档,对其进行
|
||||
# 特征值抽取,获得每个学生对每个作答页面的以下特征值数据:
|
||||
# Time:在每个页面上停留的时间(总计,每次大于1秒)
|
||||
# Revise:在每个页面上修改的次数(总计)
|
||||
# Repeat:停留的次数(总计,大于1秒)
|
||||
# Before:进入每个小题作答前时间
|
||||
# After:进入每个小题作答后时间
|
||||
# AR:每个小题作答完成后再返回被修改次数
|
||||
# ==========================================================================
|
||||
|
||||
import json
|
||||
import pandas as pd
|
||||
from sqlalchemy import column
|
||||
|
||||
data = pd.read_excel(r"A_demo_out.xlsx")
|
||||
id_dict = {} # 最终全部数据都保存在这一变量中,可以直接转换为pandas类型对象,进而写入excel文件
|
||||
for index, row in data.iterrows():
|
||||
# 逐行读取
|
||||
id = str(row["ticket_id"])
|
||||
task_name = str(row["task_name"])
|
||||
if (id not in id_dict.keys()):
|
||||
id_dict[id] = {}
|
||||
id_com = id_dict[id]
|
||||
# 以下代码:读取页数信息、读取预处理得到的关键值,若原表格中为空值则此处赋值为-1
|
||||
edit = int(row["edit"])
|
||||
AR = int(row["AR"])
|
||||
answer = json.loads(row['task_answer'])
|
||||
frame = answer["frame"]
|
||||
if (frame == None):
|
||||
page_now = -1
|
||||
else:
|
||||
page_now = int(frame["data"]["page"])
|
||||
|
||||
if (pd.isnull(row["page"])):
|
||||
page = -1
|
||||
else:
|
||||
page = int(row["page"])
|
||||
|
||||
if (pd.isnull(row["pagetime"])):
|
||||
pagetime = -1
|
||||
else:
|
||||
pagetime = float(row["pagetime"])
|
||||
|
||||
if (pd.isnull(row["before"])):
|
||||
before = -1
|
||||
else:
|
||||
before = float(row["before"])
|
||||
|
||||
if (pd.isnull(row["after"])):
|
||||
after = -1
|
||||
else:
|
||||
after = float(row["after"])
|
||||
|
||||
if (task_name == "运动会问题"):
|
||||
if (page != -1 and pagetime != -1):
|
||||
column_time = "sports_Time_A" + str(page) # 停留总时间
|
||||
column_repeat = "sports_Repeat_A" + str(page) # 返回停留的次数
|
||||
if (column_time in id_com.keys()):
|
||||
id_com[column_time] = id_com[column_time] + pagetime
|
||||
id_com[column_repeat] = id_com[column_repeat] + 1
|
||||
else:
|
||||
id_com[column_time] = pagetime
|
||||
id_com[column_repeat] = 0
|
||||
if (page_now != -1):
|
||||
column_revise = "sports_Revise_A" + str(page_now) # 修改总次数
|
||||
if (column_revise in id_com.keys()):
|
||||
id_com[column_revise] = id_com[column_revise] + edit
|
||||
else:
|
||||
id_com[column_revise] = 0
|
||||
column_AR = "sports_AR_A" + str(page_now) # 完成后返回修改总次数
|
||||
if (column_AR in id_com.keys()):
|
||||
id_com[column_AR] = id_com[column_AR] + AR
|
||||
else:
|
||||
id_com[column_AR] = 0
|
||||
if (before != -1):
|
||||
column_before = "sports_before_A" + str(page_now)
|
||||
id_com[column_before] = before
|
||||
if (after != -1):
|
||||
column_after = "sports_after_A" + str(page_now-1)
|
||||
id_com[column_after] = after
|
||||
elif (task_name == "生活水平问题"):
|
||||
if (page != -1 and pagetime != -1):
|
||||
column_time = "life_Time_A" + str(page) # 停留总时间
|
||||
column_repeat = "life_Repeat_A" + str(page) # 返回停留的次数
|
||||
if (column_time in id_com.keys()):
|
||||
id_com[column_time] = id_com[column_time] + pagetime
|
||||
id_com[column_repeat] = id_com[column_repeat] + 1
|
||||
else:
|
||||
id_com[column_time] = pagetime
|
||||
id_com[column_repeat] = 0
|
||||
if (page_now != -1):
|
||||
column_revise = "life_Revise_A" + str(page_now) # 修改总次数
|
||||
if (column_revise in id_com.keys()):
|
||||
id_com[column_revise] = id_com[column_revise] + edit
|
||||
else:
|
||||
id_com[column_revise] = 0
|
||||
column_AR = "life_AR_A" + str(page_now) # 完成后返回修改总次数
|
||||
if (column_AR in id_com.keys()):
|
||||
id_com[column_AR] = id_com[column_AR] + AR
|
||||
else:
|
||||
id_com[column_AR] = 0
|
||||
if (before != -1):
|
||||
column_before = "life_before_A" + str(page_now)
|
||||
id_com[column_before] = before
|
||||
if (after != -1):
|
||||
column_after = "life_after_A" + str(page_now-1)
|
||||
id_com[column_after] = after
|
||||
print(id_com)
|
||||
id_dict[id] = id_com
|
||||
|
||||
|
||||
data_df = pd.DataFrame(id_dict).T
|
||||
data_df = data_df.fillna(-1) # 空缺位置填充-1
|
||||
data_df.to_excel("A_demo_statre.xlsx")
|
|
@ -0,0 +1,367 @@
|
|||
# ==========================================================================
|
||||
# 处理过程性数据获取学生特征值 - step1
|
||||
# 本脚本以采集到的【已排序的】学生作答过程性作为待处理的原始数据文档,对其进行预处理
|
||||
# 预处理完成后,新的文档会在原始数据文档的基础上新增6列,作为下一步的处理对象
|
||||
#
|
||||
# 新增信息:
|
||||
# page:学生可能会连续在同一页面上执行一系列动作,当结束在同一页面上的操作并切换到另
|
||||
# 一页面后,会在切换后的第一个页面的动作行的page列标注刚刚离开的页面页数,
|
||||
# 如果停留时间(见pagetime)小于1秒则不标注
|
||||
# pagetime:同上,会在pagetime列标注在刚刚离开的页面上的停留时长,精确到1秒,如果
|
||||
# 停留时间小于1秒则不标注
|
||||
# edit:如果学生执行“编辑”操作则标注为1,否则为0;“编辑”定义为修改选择题的答案或
|
||||
# 填空题的一个连续删除序列的结束(连续删除序列后增加字符、或者切换到下一题)
|
||||
# before:学生从拿到一个新页面到在页面上首次作答之间的时间差,在每页的第一次修改行
|
||||
# 标注该行和上一行的时间差,精确到0.1秒
|
||||
# after:学生完成一个页面的作答到切换到下一页之间的时间差,在每页的第一次访问行标注
|
||||
# 该行和上一行的时间差,精确到0.1秒
|
||||
# AR:学生完成一页题目后,已经进入了下一页面、又返回修改已完成的页面。如果该编辑动作
|
||||
# 属于这种情况,则标注为1,否则为0
|
||||
# ==========================================================================
|
||||
# Attention!!!
|
||||
# 由于判断某行是否为一个修改行,需要将它和前后行进行对比,即参考3行-->计算得出中间1行
|
||||
# 因此在遍历每行时,计算的均为上一行的edit、before、after、AR值,最终结果需将这四
|
||||
# 个列表向前循环一位
|
||||
# ==========================================================================
|
||||
from operator import le
|
||||
from tracemalloc import stop
|
||||
import pandas as pd
|
||||
from datetime import date, datetime
|
||||
import json
|
||||
|
||||
def caltime(date1, date2):
|
||||
""" 计算两个datatime类型时间点之间的时间长度,长度以秒为单位
|
||||
:param date1: 开始时间
|
||||
:param date2: 结束时间
|
||||
|
||||
"""
|
||||
if (date1 == '' or date2 == ''):
|
||||
return ""
|
||||
datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00' # 可能存在的两种时间格式,均需要进行处理
|
||||
datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00'
|
||||
try:
|
||||
d1 = datetime.strptime(str(date1), datetimeFormat)
|
||||
except ValueError:
|
||||
d1 = datetime.strptime(str(date1), datetimeFormat2)
|
||||
try:
|
||||
d2 = datetime.strptime(str(date2), datetimeFormat)
|
||||
except ValueError:
|
||||
d2 = datetime.strptime(str(date2), datetimeFormat2)
|
||||
delta = d2 - d1 # 两个datetime类相减,得到一个deltatime类的对象
|
||||
miao = delta.seconds
|
||||
if (miao == 0):
|
||||
miao = ""
|
||||
return miao
|
||||
|
||||
data = pd.read_excel(r'A_demo.xlsx') # 输入文档,要求是完整的已排序的过程性数据
|
||||
|
||||
# 以下为定义的全局变量
|
||||
old_id = "" # 上一行的id
|
||||
old_task_name = "" # 上一行的任务名称
|
||||
oldold_answer_dict = {} # 上上一行、其打开页面上的作答结果
|
||||
old_answer_dict = {} # 上一行、其打开页面上的作答结果
|
||||
oldold_page = 0 # 上上一行、其打开的页面页数
|
||||
old_page = 0 # 上一行、其打开的页面页数
|
||||
oldold_time = "" # 上上一行、其动作的时间戳
|
||||
old_time = "" # 上一行、其动作的时间戳
|
||||
starttime = "" # 这两个time值用于保存计算页面访问时间的值
|
||||
stoptime = ""
|
||||
max_page = 0 # 由于计算AR列需要判断当前行是否为一个返回已完成页面的动作,即当前行的页面号是否小于已访问过的最大序号
|
||||
before_tag = [] # 由于计算before列需要获得每页的第一次修改行,因此用该变量标注当前学生已修改过的页面页数
|
||||
|
||||
# 以下为最终写入新表格的新列数据
|
||||
page_list = []
|
||||
page_time_list = []
|
||||
edit_list = []
|
||||
AR_list = []
|
||||
before_list = []
|
||||
after_list = []
|
||||
|
||||
|
||||
|
||||
for index, row in data.iterrows():
|
||||
# 开始按行遍历表格
|
||||
print(index)
|
||||
# 提取每行的重要信息
|
||||
id = str(row['ticket_id'])
|
||||
task_id = str(row["task_id"])
|
||||
task_name = str(row["task_name"])
|
||||
timestamp = str(row["timestamp"])
|
||||
answer = json.loads(row['task_answer'])
|
||||
frame = answer["frame"]
|
||||
answer_dict = {}
|
||||
if (frame != None):
|
||||
# 若该行中能提取出作答信息,则提取作答结果数据,反之该行可能是用于分隔不同学生的null列
|
||||
dataa = frame["data"]
|
||||
page = int(dataa["page"])
|
||||
# 以下代码段用于更新计算每页停留时间的starttime变量和stoptime变量,并写入page和pagetime列
|
||||
if ((old_id != id) or (old_task_name != task_name)):
|
||||
print("###")
|
||||
starttime = timestamp
|
||||
page_list.append("")
|
||||
page_time_list.append("")
|
||||
elif (page != 1 and old_page != page):
|
||||
print("&&&")
|
||||
stoptime = timestamp
|
||||
delta = caltime(starttime, stoptime)
|
||||
starttime = timestamp
|
||||
page_list.append(old_page)
|
||||
page_time_list.append(delta)
|
||||
else:
|
||||
print("@@@")
|
||||
page_list.append("")
|
||||
page_time_list.append("")
|
||||
# 以下代码段处理具体的学生作答结果,写入edit、before、AR列
|
||||
answer = dataa["answer"]
|
||||
answer_list = list(answer)
|
||||
### 以下代码段从学生的全部作答中,提取当前操作页面的作答结果,保存入全局变量answer_dict
|
||||
if (task_name == "热身题【本题不计入总分】"):
|
||||
# 热身题
|
||||
pass
|
||||
elif (task_name == "运动会问题"):
|
||||
# 运动会问题
|
||||
if (page == 1):
|
||||
answer_dict['P3'] = answer_list[0]
|
||||
elif (page == 2):
|
||||
answer_dict['MM60311'] = answer_list[1]
|
||||
elif (page == 3):
|
||||
answer_dict['MM60321'] = answer_list[2]
|
||||
elif (page == 4):
|
||||
answer_dict['MM60331'] = answer_list[3]
|
||||
elif (page == 5):
|
||||
answer_dict['MM60341_wang'] = answer_list[4]
|
||||
answer_dict['MM60341_ming'] = answer_list[5]
|
||||
answer_dict['MM60341_zhang'] = answer_list[6]
|
||||
answer_dict['MM60341_li'] = answer_list[7]
|
||||
answer_dict['MM60341_hua'] = answer_list[8]
|
||||
elif (page == 6):
|
||||
answer_dict['MM60351'] = answer_list[9]
|
||||
elif (task_name == "生活水平问题"):
|
||||
# 生活水平问题
|
||||
if (page == 1):
|
||||
answer_dict['P4'] = answer_list[0]
|
||||
elif (page == 2):
|
||||
answer_dict['MM60411'] = answer_list[1]
|
||||
elif (page == 3):
|
||||
answer_dict['MM60421'] = answer_list[2]
|
||||
elif (page == 4):
|
||||
answer_dict['MM60431'] = answer_list[3]
|
||||
answer_dict['MM60432'] = answer_list[4]
|
||||
elif (page == 5):
|
||||
answer_dict['MM60441'] = answer_list[5]
|
||||
answer_dict['MM60442'] = answer_list[6]
|
||||
elif (page == 6):
|
||||
answer_dict['MM60451_1'] = answer_list[7]
|
||||
answer_dict['MM60451_2'] = answer_list[8]
|
||||
elif (page == 7):
|
||||
answer_dict['MM60461_1'] = answer_list[9]
|
||||
answer_dict['MM60461_2'] = answer_list[10]
|
||||
answer_dict['MM60461_3'] = answer_list[11]
|
||||
answer_dict['MM60461_4'] = answer_list[12]
|
||||
# 以下处理判断【当前处理行的【上一行】】是否为修改动作,是否为edit,判断逻辑为:
|
||||
# ========================================================================
|
||||
# 从当前的page要向前看两页old_page和oldold_page,判断old_page页为修改页的条件:
|
||||
# old_page相对oldold,页数不变
|
||||
# old_page为选择题页
|
||||
# 要求出现的-1数目没有改变
|
||||
# old_page为填空题页
|
||||
# old_page相对oldold,字符串长度缩短且
|
||||
# page相对old_page,页数不变且字符串长度增加
|
||||
# page相对old_page,页数改变
|
||||
# ========================================================================
|
||||
# NOTES:由于具体每页的题目数量和类型均不同,因此难以整合出统一的处理方法(函数),
|
||||
# 目前直接对每题、每页进行单独判断并处理,后期可优化
|
||||
if (old_page != oldold_page):
|
||||
edit_list.append("0")
|
||||
before_list.append("")
|
||||
if (max_page < old_page):
|
||||
time_tmp = caltime(oldold_time, old_time)
|
||||
if (time_tmp == ""):
|
||||
time_tmp = 0
|
||||
after_list.append(time_tmp)
|
||||
else:
|
||||
after_list.append("")
|
||||
else:
|
||||
print(before_tag)
|
||||
after_list.append("")
|
||||
if (old_page not in before_tag):
|
||||
before_tag.append(old_page)
|
||||
time_tmp = caltime(oldold_time, old_time)
|
||||
if (time_tmp == ""):
|
||||
time_tmp = 0
|
||||
before_list.append(time_tmp)
|
||||
else:
|
||||
before_list.append("")
|
||||
if (old_task_name == "运动会问题"):
|
||||
if (old_page in [1, 2, 3, 6]):
|
||||
old_answer = str(list(old_answer_dict.values()))
|
||||
oldold_answer = str(list(oldold_answer_dict.values()))
|
||||
if (old_answer.count("-1") == oldold_answer.count("-1")):
|
||||
edit_list.append("1")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
elif (old_page == 4):
|
||||
if (len(old_answer_dict["MM60331"]) < len(oldold_answer_dict["MM60331"])):
|
||||
if (page != old_page):
|
||||
edit_list.append("1")
|
||||
elif (page == old_page and len(old_answer_dict["MM60331"]) < len(answer_dict["MM60331"])):
|
||||
edit_list.append("1")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
elif (old_page == 5):
|
||||
old_answer = old_answer_dict["MM60341_wang"] + old_answer_dict["MM60341_ming"] + old_answer_dict["MM60341_zhang"] + old_answer_dict["MM60341_li"] + old_answer_dict["MM60341_hua"]
|
||||
oldold_answer = oldold_answer_dict["MM60341_wang"] + oldold_answer_dict["MM60341_ming"] + oldold_answer_dict["MM60341_zhang"] + oldold_answer_dict["MM60341_li"] + oldold_answer_dict["MM60341_hua"]
|
||||
if (len(old_answer) < len(oldold_answer)):
|
||||
if (page != old_page):
|
||||
edit_list.append("1")
|
||||
elif (page == old_page):
|
||||
now_answer = answer_dict["MM60341_wang"] + answer_dict["MM60341_ming"] + answer_dict["MM60341_zhang"] + answer_dict["MM60341_li"] + answer_dict["MM60341_hua"]
|
||||
if (len(old_answer) < len(now_answer)):
|
||||
edit_list.append("1")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
elif (old_task_name == "生活水平问题"):
|
||||
if (old_page in [1, 2, 3, 7]):
|
||||
old_answer = str(list(old_answer_dict.values()))
|
||||
oldold_answer = str(list(oldold_answer_dict.values()))
|
||||
if (old_answer.count("-1") == oldold_answer.count("-1")):
|
||||
edit_list.append("1")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
elif (old_page == 4):
|
||||
old_answer = old_answer_dict["MM60431"] + old_answer_dict["MM60432"]
|
||||
oldold_answer = oldold_answer_dict["MM60431"] + oldold_answer_dict["MM60432"]
|
||||
if (len(old_answer) < len(oldold_answer)):
|
||||
if (page != old_page):
|
||||
edit_list.append("1")
|
||||
elif (page == old_page):
|
||||
now_answer = answer_dict["MM60431"] + answer_dict["MM60432"]
|
||||
if (len(old_answer) < len(now_answer)):
|
||||
edit_list.append("1")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
elif (old_page == 5):
|
||||
old_answer = old_answer_dict["MM60441"] + old_answer_dict["MM60442"]
|
||||
oldold_answer = oldold_answer_dict["MM60441"] + oldold_answer_dict["MM60442"]
|
||||
if (len(old_answer) < len(oldold_answer)):
|
||||
if (page != old_page):
|
||||
edit_list.append("1")
|
||||
elif (page == old_page):
|
||||
now_answer = answer_dict["MM60441"] + answer_dict["MM60442"]
|
||||
if (len(old_answer) < len(now_answer)):
|
||||
edit_list.append("1")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
elif (old_page == 6):
|
||||
old_answer = old_answer_dict["MM60451_1"] + old_answer_dict["MM60451_2"]
|
||||
oldold_answer = oldold_answer_dict["MM60451_1"] + oldold_answer_dict["MM60451_2"]
|
||||
if (len(old_answer) < len(oldold_answer)):
|
||||
if (page != old_page):
|
||||
edit_list.append("1")
|
||||
elif (page == old_page):
|
||||
now_answer = answer_dict["MM60451_1"] + answer_dict["MM60451_2"]
|
||||
if (len(old_answer) < len(now_answer)):
|
||||
edit_list.append("1")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
else:
|
||||
edit_list.append("0")
|
||||
edit = int(edit_list[-1])
|
||||
# 以下代码:写入AR列,满足AR列的前提条件是一个edit列
|
||||
# 满足AR列的条件:(edit==1) && (该行的页面序号小于最大已访问页面序号)
|
||||
if (max_page < old_page):
|
||||
max_page = old_page
|
||||
if (edit == 1 and old_page < max_page):
|
||||
AR_list.append("1")
|
||||
else:
|
||||
AR_list.append("0")
|
||||
# 以下代码:常规动作,更新全局变量
|
||||
oldold_page = old_page
|
||||
old_page = page
|
||||
oldold_time = old_time
|
||||
old_time = timestamp
|
||||
else:
|
||||
# 若该行是无内容null页,用于分隔不同的学生作答
|
||||
# 以下代码:此时可以重新初始化各个每个学生特有的变量
|
||||
page = 0
|
||||
before_tag = []
|
||||
max_page = 0
|
||||
# 以下代码:如果改变了用户id或task_name,则当前时间设为starttime,即第一道题的开始时间
|
||||
if ((old_id != id) or (old_task_name != task_name)):
|
||||
print("###")
|
||||
starttime = timestamp
|
||||
# 以下代码:向写入新列中填充合适的空值
|
||||
page_list.append("")
|
||||
page_time_list.append("")
|
||||
edit_list.append("0")
|
||||
AR_list.append("0")
|
||||
before_list.append("")
|
||||
# 以下代码:但针对after列,切换了用户则必然代表切换到新页,因此向after列中填充合适的空值
|
||||
time_tmp = caltime(oldold_time, old_time)
|
||||
if (time_tmp == ""):
|
||||
time_tmp = 0
|
||||
after_list.append(time_tmp)
|
||||
# 以下代码:常规动作,更新全局变量
|
||||
oldold_time = old_time
|
||||
old_time = timestamp
|
||||
oldold_page = old_page
|
||||
old_page = page
|
||||
# 以下代码:常规动作,更新全局变量
|
||||
old_id = id
|
||||
old_task_name = task_name
|
||||
oldold_answer_dict = old_answer_dict
|
||||
old_answer_dict = answer_dict
|
||||
|
||||
# 以下代码:将四个列表向前循环一位
|
||||
x = edit_list.pop(0)
|
||||
edit_list.append(x)
|
||||
|
||||
x = AR_list.pop(0)
|
||||
AR_list.append(x)
|
||||
|
||||
x = before_list.pop(0)
|
||||
before_list.append(x)
|
||||
|
||||
x = after_list.pop(0)
|
||||
after_list.append(x)
|
||||
|
||||
# 以下代码:将6个新列表写入原始pandas数据
|
||||
col_name = data.columns.tolist()
|
||||
|
||||
col_name.insert(col_name.index('task_answer')+1, 'AR')
|
||||
col_name.insert(col_name.index('task_answer')+1, 'after')
|
||||
col_name.insert(col_name.index('task_answer')+1, 'before')
|
||||
col_name.insert(col_name.index('task_answer')+1, 'edit')
|
||||
col_name.insert(col_name.index('task_answer')+1, 'pagetime')
|
||||
col_name.insert(col_name.index('task_answer')+1, 'page')
|
||||
data = data.reindex(columns=col_name)
|
||||
data['AR'] = AR_list
|
||||
data['after'] = after_list
|
||||
data['before'] = before_list
|
||||
data['edit'] = edit_list
|
||||
data['pagetime'] = page_time_list
|
||||
data['page'] = page_list
|
||||
|
||||
data.to_excel('A_demo_out.xlsx') # 数据导出
|
Loading…
Reference in New Issue