Merge pull request '韩滏婧的代码中期提交' (#2) from syncline/openbrain:master into master

This commit is contained in:
OpenCT 2022-08-21 21:40:36 +08:00
commit 03b7bbaae0
13 changed files with 1657 additions and 0 deletions

View File

@ -0,0 +1,55 @@
#### 过程性数据处理
使用过程性数据补充结果性数据中缺失的结束时间,以此可以重新进行结果性数据分析中的学生作答时长分析部分
- split.py - 从系统抓取的全部原始数据中分割出属于各答卷的部分
- get_stoptime_a.py - 使用过程性数据中补充a卷结果性数据中缺失的结束时间
- get_stoptime_z.py - 使用过程性数据中补充z卷结果性数据中缺失的结束时间
- statistic_time.py - 重新进行作答时长统计
从原始过程性数据中构造出学生的个性化特征指标数据集对每位学生作答每页具体题目均给出6项特征值
- preprocess_processdata_A.py - 预处理原始过程性数据,增加对每个数据记录的特征标记
- getcharacter_processdata_A.py - 分析已完成预处理的过程性数据,得到学生个性化特征指标数据集
#### 特征值
##### Time在每个页面上停留时间
- 仅考虑所有“有效停留”的时间总和“有效停留”定义为在该页面上停留时间长于1秒
- 目前只能对每个页面的用时进行区别,在少数情况下,一个页面上会放置多个题目,但此时学生很可能不按顺序作答题目,因此放弃处理,只分析学生在每个页面上停留的时间(其他特征值同理)
- 划分每道题目用时的代码逻辑:
- 新增两列pagepage_time在每页结束行的对应行即每个page结束后第一个page改变行标注该page的使用时长
- 若改变了用户id或task_name则当前时间设为starttime即第一道题的开始时间
- 若改变了page且page!=1即page != old_page and page!=1则当前时间设为old_page的stoptime和记录的starttime进行运算记录old_page加到page列中记录时间差加到page_time列中
##### Repeat作答完成每个页面后返回该页面查看的次数
- 指除正常按顺序查看本页面之外,学生在作答其他页面的题目时,可能希望重新查看该页面作为参考,每次回到本页面视为一次返回
- 因为答题系统不能跳跃查看题目如作答第5页面时希望参考第3页面这就需要在4页面上短暂停留但这一访问4页面的动作其实是无意义的因此在分析时在页面的停留时间大于1秒才视为1次有效的返回停留
- 可以根据上一步得到的page和pagetime列直接处理得到
##### Revise在每个页面上修改次数
- 对选择题:修改答案(容易得到)
- 对填空题将一个连续删除序列视为一次修改不容易得到因为是逐行读取数据这其实需要3行数据
- 增加edit列标注每行是否是修改动作对该列统计得到
##### Before进入每个页面到第一次作答的时间
- 定位每页的第一次修改和上一个动作的时间差
- 增加before列在每页的第一次修改行、写入该行和上一行的时间差取该列的值得到
##### After第一次完成每个页面作答到离开页面的时间
- 答题系统的设计逻辑是:只有完成前一页题目,才能进入下一页题目,因此首次进入每页题目的前一个动作,必定是前一页题目的完成动作。
- 定位某页的第一次进入和上一个动作的时间差
- 增加after列在每页的第一次进入行、写入该行和上一行的时间差取该列的值得到
##### AR作答完成每个页面后返回该页面修改的次数
- 答题系统的设计逻辑是:只有完成前一页题目,才能进入下一页题目,因此首次进入每页题目的前一个动作,必定是前一页题目的完成动作。
- 直接取每行对应的page非page列判断得到完成后返回的行取和前文得到的edit列的交集
- 增加AR列对该列统计得到
- 修改总次数 = 完成后修改次数 + 完成前修改次数
- 判断完成后返回行、第一次进入的动作可同时进行

View File

@ -0,0 +1,77 @@
import pandas as pd
from datetime import datetime
time_dict = {}
stop_time_dict = {}
data = pd.read_excel(r'A.xlsx')
datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00'
datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00'
for index, row in data.iterrows():
id = str(row['ticket_id'])
timestamp = str(row['timestamp'])
if (id not in time_dict.keys()):
time_dict[id] = []
time_dict[id].append(timestamp)
else:
time_dict[id].append(timestamp)
for key, value in time_dict.items():
stop_time_dict[key] = value[-1]
data2 = pd.read_excel(r'a_out_0714.xlsx')
stoptime_new_list = []
time_new_list = []
empty = 0
for index, row in data2.iterrows():
print(index)
id = str(row['ticket_id'])
P1_CODE = row['P3_CODE']
MM60101_CODE = row['MM60311_CODE']
if (pd.isna(row['stop_time']) and (int(P1_CODE) != 99 or int(MM60101_CODE) != 99)):
if (id in stop_time_dict.keys()):
timestamp = stop_time_dict[id]
stoptime_new_list.append(timestamp)
try:
date2 = datetime.strptime(str(timestamp), datetimeFormat)
except ValueError:
date2 = datetime.strptime(str(timestamp), datetimeFormat2)
else:
empty = empty + 1
stoptime_new_list.append("")
time_new_list.append("")
continue
elif (pd.isna(row['stop_time']) and int(P1_CODE) == 99 and int(MM60101_CODE) == 99):
empty = empty + 1
stoptime_new_list.append("")
time_new_list.append("")
continue
else:
stoptime_new_list.append("")
try:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat)
except ValueError:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat2)
try:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat)
except ValueError:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat2)
delta = date2 - date1
miao = delta.seconds
fen = round(miao/60, 2)
time_new_list.append(fen)
col_name = data2.columns.tolist()
col_name.insert(col_name.index('stop_time')+1, 'stoptime_new')
data2 = data2.reindex(columns=col_name)
data2['stoptime_new'] = stoptime_new_list
col_name.insert(col_name.index('cost_time')+1, 'time_new')
data2 = data2.reindex(columns=col_name)
data2['time_new'] = time_new_list
data2.to_excel('a_out_0719.xlsx')

View File

@ -0,0 +1,77 @@
import pandas as pd
from datetime import datetime
time_dict = {}
stop_time_dict = {}
data = pd.read_excel(r'Z.xlsx')
datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00'
datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00'
for index, row in data.iterrows():
id = str(row['ticket_id'])
timestamp = str(row['timestamp'])
if (id not in time_dict.keys()):
time_dict[id] = []
time_dict[id].append(timestamp)
else:
time_dict[id].append(timestamp)
for key, value in time_dict.items():
stop_time_dict[key] = value[-1]
data2 = pd.read_excel(r'z_out_0715.xlsx')
stoptime_new_list = []
time_new_list = []
empty = 0
for index, row in data2.iterrows():
print(index)
id = str(row['ticket_id'])
P1_CODE = row['P1_CODE']
MM60101_CODE = row['MM60101_CODE']
if (pd.isna(row['stop_time']) and (int(P1_CODE) != 99 or int(MM60101_CODE) != 99)):
if (id in stop_time_dict.keys()):
timestamp = stop_time_dict[id]
stoptime_new_list.append(timestamp)
try:
date2 = datetime.strptime(str(timestamp), datetimeFormat)
except ValueError:
date2 = datetime.strptime(str(timestamp), datetimeFormat2)
else:
empty = empty + 1
stoptime_new_list.append("")
time_new_list.append("")
continue
elif (pd.isna(row['stop_time']) and int(P1_CODE) == 99 and int(MM60101_CODE) == 99):
empty = empty + 1
stoptime_new_list.append("")
time_new_list.append("")
continue
else:
stoptime_new_list.append("")
try:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat)
except ValueError:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat2)
try:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat)
except ValueError:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat2)
delta = date2 - date1
miao = delta.seconds
fen = round(miao/60, 2)
time_new_list.append(fen)
col_name = data2.columns.tolist()
col_name.insert(col_name.index('stop_time')+1, 'stoptime_new')
data2 = data2.reindex(columns=col_name)
data2['stoptime_new'] = stoptime_new_list
col_name.insert(col_name.index('cost_time')+1, 'time_new')
data2 = data2.reindex(columns=col_name)
data2['time_new'] = time_new_list
data2.to_excel('z_out_0719.xlsx')

View File

@ -0,0 +1,116 @@
# ==========================================================================
# 处理过程性数据获取学生特征值 - step2
# 本脚本以完成了预处理的【已排序的】学生作答过程性作为待处理的原始数据文档,对其进行
# 特征值抽取,获得每个学生对每个作答页面的以下特征值数据:
# Time:在每个页面上停留的时间总计每次大于1秒
# Revise:在每个页面上修改的次数(总计)
# Repeat:停留的次数总计大于1秒
# Before:进入每个小题作答前时间
# After:进入每个小题作答后时间
# AR:每个小题作答完成后再返回被修改次数
# ==========================================================================
import json
import pandas as pd
from sqlalchemy import column
data = pd.read_excel(r"A_demo_out.xlsx")
id_dict = {} # 最终全部数据都保存在这一变量中可以直接转换为pandas类型对象进而写入excel文件
for index, row in data.iterrows():
# 逐行读取
id = str(row["ticket_id"])
task_name = str(row["task_name"])
if (id not in id_dict.keys()):
id_dict[id] = {}
id_com = id_dict[id]
# 以下代码:读取页数信息、读取预处理得到的关键值,若原表格中为空值则此处赋值为-1
edit = int(row["edit"])
AR = int(row["AR"])
answer = json.loads(row['task_answer'])
frame = answer["frame"]
if (frame == None):
page_now = -1
else:
page_now = int(frame["data"]["page"])
if (pd.isnull(row["page"])):
page = -1
else:
page = int(row["page"])
if (pd.isnull(row["pagetime"])):
pagetime = -1
else:
pagetime = float(row["pagetime"])
if (pd.isnull(row["before"])):
before = -1
else:
before = float(row["before"])
if (pd.isnull(row["after"])):
after = -1
else:
after = float(row["after"])
if (task_name == "运动会问题"):
if (page != -1 and pagetime != -1):
column_time = "sports_Time_A" + str(page) # 停留总时间
column_repeat = "sports_Repeat_A" + str(page) # 返回停留的次数
if (column_time in id_com.keys()):
id_com[column_time] = id_com[column_time] + pagetime
id_com[column_repeat] = id_com[column_repeat] + 1
else:
id_com[column_time] = pagetime
id_com[column_repeat] = 0
if (page_now != -1):
column_revise = "sports_Revise_A" + str(page_now) # 修改总次数
if (column_revise in id_com.keys()):
id_com[column_revise] = id_com[column_revise] + edit
else:
id_com[column_revise] = 0
column_AR = "sports_AR_A" + str(page_now) # 完成后返回修改总次数
if (column_AR in id_com.keys()):
id_com[column_AR] = id_com[column_AR] + AR
else:
id_com[column_AR] = 0
if (before != -1):
column_before = "sports_before_A" + str(page_now)
id_com[column_before] = before
if (after != -1):
column_after = "sports_after_A" + str(page_now-1)
id_com[column_after] = after
elif (task_name == "生活水平问题"):
if (page != -1 and pagetime != -1):
column_time = "life_Time_A" + str(page) # 停留总时间
column_repeat = "life_Repeat_A" + str(page) # 返回停留的次数
if (column_time in id_com.keys()):
id_com[column_time] = id_com[column_time] + pagetime
id_com[column_repeat] = id_com[column_repeat] + 1
else:
id_com[column_time] = pagetime
id_com[column_repeat] = 0
if (page_now != -1):
column_revise = "life_Revise_A" + str(page_now) # 修改总次数
if (column_revise in id_com.keys()):
id_com[column_revise] = id_com[column_revise] + edit
else:
id_com[column_revise] = 0
column_AR = "life_AR_A" + str(page_now) # 完成后返回修改总次数
if (column_AR in id_com.keys()):
id_com[column_AR] = id_com[column_AR] + AR
else:
id_com[column_AR] = 0
if (before != -1):
column_before = "life_before_A" + str(page_now)
id_com[column_before] = before
if (after != -1):
column_after = "life_after_A" + str(page_now-1)
id_com[column_after] = after
print(id_com)
id_dict[id] = id_com
data_df = pd.DataFrame(id_dict).T
data_df = data_df.fillna(-1) # 空缺位置填充-1
data_df.to_excel("A_demo_statre.xlsx")

View File

@ -0,0 +1,367 @@
# ==========================================================================
# 处理过程性数据获取学生特征值 - step1
# 本脚本以采集到的【已排序的】学生作答过程性作为待处理的原始数据文档,对其进行预处理
# 预处理完成后新的文档会在原始数据文档的基础上新增6列作为下一步的处理对象
#
# 新增信息:
# page学生可能会连续在同一页面上执行一系列动作当结束在同一页面上的操作并切换到另
# 一页面后会在切换后的第一个页面的动作行的page列标注刚刚离开的页面页数
# 如果停留时间见pagetime小于1秒则不标注
# pagetime同上会在pagetime列标注在刚刚离开的页面上的停留时长精确到1秒如果
# 停留时间小于1秒则不标注
# edit如果学生执行“编辑”操作则标注为1否则为0“编辑”定义为修改选择题的答案或
# 填空题的一个连续删除序列的结束(连续删除序列后增加字符、或者切换到下一题)
# before学生从拿到一个新页面到在页面上首次作答之间的时间差在每页的第一次修改行
# 标注该行和上一行的时间差精确到0.1秒
# after学生完成一个页面的作答到切换到下一页之间的时间差在每页的第一次访问行标注
# 该行和上一行的时间差精确到0.1秒
# AR学生完成一页题目后已经进入了下一页面、又返回修改已完成的页面。如果该编辑动作
# 属于这种情况则标注为1否则为0
# ==========================================================================
# Attention!!!
# 由于判断某行是否为一个修改行需要将它和前后行进行对比即参考3行-->计算得出中间1行
# 因此在遍历每行时计算的均为上一行的edit、before、after、AR值最终结果需将这四
# 个列表向前循环一位
# ==========================================================================
from operator import le
from tracemalloc import stop
import pandas as pd
from datetime import date, datetime
import json
def caltime(date1, date2):
""" 计算两个datatime类型时间点之间的时间长度长度以秒为单位
:param date1: 开始时间
:param date2: 结束时间
"""
if (date1 == '' or date2 == ''):
return ""
datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00' # 可能存在的两种时间格式,均需要进行处理
datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00'
try:
d1 = datetime.strptime(str(date1), datetimeFormat)
except ValueError:
d1 = datetime.strptime(str(date1), datetimeFormat2)
try:
d2 = datetime.strptime(str(date2), datetimeFormat)
except ValueError:
d2 = datetime.strptime(str(date2), datetimeFormat2)
delta = d2 - d1 # 两个datetime类相减得到一个deltatime类的对象
miao = delta.seconds
if (miao == 0):
miao = ""
return miao
data = pd.read_excel(r'A_demo.xlsx') # 输入文档,要求是完整的已排序的过程性数据
# 以下为定义的全局变量
old_id = "" # 上一行的id
old_task_name = "" # 上一行的任务名称
oldold_answer_dict = {} # 上上一行、其打开页面上的作答结果
old_answer_dict = {} # 上一行、其打开页面上的作答结果
oldold_page = 0 # 上上一行、其打开的页面页数
old_page = 0 # 上一行、其打开的页面页数
oldold_time = "" # 上上一行、其动作的时间戳
old_time = "" # 上一行、其动作的时间戳
starttime = "" # 这两个time值用于保存计算页面访问时间的值
stoptime = ""
max_page = 0 # 由于计算AR列需要判断当前行是否为一个返回已完成页面的动作即当前行的页面号是否小于已访问过的最大序号
before_tag = [] # 由于计算before列需要获得每页的第一次修改行因此用该变量标注当前学生已修改过的页面页数
# 以下为最终写入新表格的新列数据
page_list = []
page_time_list = []
edit_list = []
AR_list = []
before_list = []
after_list = []
for index, row in data.iterrows():
# 开始按行遍历表格
print(index)
# 提取每行的重要信息
id = str(row['ticket_id'])
task_id = str(row["task_id"])
task_name = str(row["task_name"])
timestamp = str(row["timestamp"])
answer = json.loads(row['task_answer'])
frame = answer["frame"]
answer_dict = {}
if (frame != None):
# 若该行中能提取出作答信息则提取作答结果数据反之该行可能是用于分隔不同学生的null列
dataa = frame["data"]
page = int(dataa["page"])
# 以下代码段用于更新计算每页停留时间的starttime变量和stoptime变量并写入page和pagetime列
if ((old_id != id) or (old_task_name != task_name)):
print("###")
starttime = timestamp
page_list.append("")
page_time_list.append("")
elif (page != 1 and old_page != page):
print("&&&")
stoptime = timestamp
delta = caltime(starttime, stoptime)
starttime = timestamp
page_list.append(old_page)
page_time_list.append(delta)
else:
print("@@@")
page_list.append("")
page_time_list.append("")
# 以下代码段处理具体的学生作答结果写入edit、before、AR列
answer = dataa["answer"]
answer_list = list(answer)
### 以下代码段从学生的全部作答中提取当前操作页面的作答结果保存入全局变量answer_dict
if (task_name == "热身题【本题不计入总分】"):
# 热身题
pass
elif (task_name == "运动会问题"):
# 运动会问题
if (page == 1):
answer_dict['P3'] = answer_list[0]
elif (page == 2):
answer_dict['MM60311'] = answer_list[1]
elif (page == 3):
answer_dict['MM60321'] = answer_list[2]
elif (page == 4):
answer_dict['MM60331'] = answer_list[3]
elif (page == 5):
answer_dict['MM60341_wang'] = answer_list[4]
answer_dict['MM60341_ming'] = answer_list[5]
answer_dict['MM60341_zhang'] = answer_list[6]
answer_dict['MM60341_li'] = answer_list[7]
answer_dict['MM60341_hua'] = answer_list[8]
elif (page == 6):
answer_dict['MM60351'] = answer_list[9]
elif (task_name == "生活水平问题"):
# 生活水平问题
if (page == 1):
answer_dict['P4'] = answer_list[0]
elif (page == 2):
answer_dict['MM60411'] = answer_list[1]
elif (page == 3):
answer_dict['MM60421'] = answer_list[2]
elif (page == 4):
answer_dict['MM60431'] = answer_list[3]
answer_dict['MM60432'] = answer_list[4]
elif (page == 5):
answer_dict['MM60441'] = answer_list[5]
answer_dict['MM60442'] = answer_list[6]
elif (page == 6):
answer_dict['MM60451_1'] = answer_list[7]
answer_dict['MM60451_2'] = answer_list[8]
elif (page == 7):
answer_dict['MM60461_1'] = answer_list[9]
answer_dict['MM60461_2'] = answer_list[10]
answer_dict['MM60461_3'] = answer_list[11]
answer_dict['MM60461_4'] = answer_list[12]
# 以下处理判断【当前处理行的【上一行】】是否为修改动作是否为edit判断逻辑为
# ========================================================================
# 从当前的page要向前看两页old_page和oldold_page判断old_page页为修改页的条件
# old_page相对oldold页数不变
# old_page为选择题页
# 要求出现的-1数目没有改变
# old_page为填空题页
# old_page相对oldold字符串长度缩短且
# page相对old_page页数不变且字符串长度增加
# page相对old_page页数改变
# ========================================================================
# NOTES由于具体每页的题目数量和类型均不同因此难以整合出统一的处理方法函数
# 目前直接对每题、每页进行单独判断并处理,后期可优化
if (old_page != oldold_page):
edit_list.append("0")
before_list.append("")
if (max_page < old_page):
time_tmp = caltime(oldold_time, old_time)
if (time_tmp == ""):
time_tmp = 0
after_list.append(time_tmp)
else:
after_list.append("")
else:
print(before_tag)
after_list.append("")
if (old_page not in before_tag):
before_tag.append(old_page)
time_tmp = caltime(oldold_time, old_time)
if (time_tmp == ""):
time_tmp = 0
before_list.append(time_tmp)
else:
before_list.append("")
if (old_task_name == "运动会问题"):
if (old_page in [1, 2, 3, 6]):
old_answer = str(list(old_answer_dict.values()))
oldold_answer = str(list(oldold_answer_dict.values()))
if (old_answer.count("-1") == oldold_answer.count("-1")):
edit_list.append("1")
else:
edit_list.append("0")
elif (old_page == 4):
if (len(old_answer_dict["MM60331"]) < len(oldold_answer_dict["MM60331"])):
if (page != old_page):
edit_list.append("1")
elif (page == old_page and len(old_answer_dict["MM60331"]) < len(answer_dict["MM60331"])):
edit_list.append("1")
else:
edit_list.append("0")
else:
edit_list.append("0")
elif (old_page == 5):
old_answer = old_answer_dict["MM60341_wang"] + old_answer_dict["MM60341_ming"] + old_answer_dict["MM60341_zhang"] + old_answer_dict["MM60341_li"] + old_answer_dict["MM60341_hua"]
oldold_answer = oldold_answer_dict["MM60341_wang"] + oldold_answer_dict["MM60341_ming"] + oldold_answer_dict["MM60341_zhang"] + oldold_answer_dict["MM60341_li"] + oldold_answer_dict["MM60341_hua"]
if (len(old_answer) < len(oldold_answer)):
if (page != old_page):
edit_list.append("1")
elif (page == old_page):
now_answer = answer_dict["MM60341_wang"] + answer_dict["MM60341_ming"] + answer_dict["MM60341_zhang"] + answer_dict["MM60341_li"] + answer_dict["MM60341_hua"]
if (len(old_answer) < len(now_answer)):
edit_list.append("1")
else:
edit_list.append("0")
else:
edit_list.append("0")
else:
edit_list.append("0")
else:
edit_list.append("0")
elif (old_task_name == "生活水平问题"):
if (old_page in [1, 2, 3, 7]):
old_answer = str(list(old_answer_dict.values()))
oldold_answer = str(list(oldold_answer_dict.values()))
if (old_answer.count("-1") == oldold_answer.count("-1")):
edit_list.append("1")
else:
edit_list.append("0")
elif (old_page == 4):
old_answer = old_answer_dict["MM60431"] + old_answer_dict["MM60432"]
oldold_answer = oldold_answer_dict["MM60431"] + oldold_answer_dict["MM60432"]
if (len(old_answer) < len(oldold_answer)):
if (page != old_page):
edit_list.append("1")
elif (page == old_page):
now_answer = answer_dict["MM60431"] + answer_dict["MM60432"]
if (len(old_answer) < len(now_answer)):
edit_list.append("1")
else:
edit_list.append("0")
else:
edit_list.append("0")
else:
edit_list.append("0")
elif (old_page == 5):
old_answer = old_answer_dict["MM60441"] + old_answer_dict["MM60442"]
oldold_answer = oldold_answer_dict["MM60441"] + oldold_answer_dict["MM60442"]
if (len(old_answer) < len(oldold_answer)):
if (page != old_page):
edit_list.append("1")
elif (page == old_page):
now_answer = answer_dict["MM60441"] + answer_dict["MM60442"]
if (len(old_answer) < len(now_answer)):
edit_list.append("1")
else:
edit_list.append("0")
else:
edit_list.append("0")
else:
edit_list.append("0")
elif (old_page == 6):
old_answer = old_answer_dict["MM60451_1"] + old_answer_dict["MM60451_2"]
oldold_answer = oldold_answer_dict["MM60451_1"] + oldold_answer_dict["MM60451_2"]
if (len(old_answer) < len(oldold_answer)):
if (page != old_page):
edit_list.append("1")
elif (page == old_page):
now_answer = answer_dict["MM60451_1"] + answer_dict["MM60451_2"]
if (len(old_answer) < len(now_answer)):
edit_list.append("1")
else:
edit_list.append("0")
else:
edit_list.append("0")
else:
edit_list.append("0")
else:
edit_list.append("0")
else:
edit_list.append("0")
edit = int(edit_list[-1])
# 以下代码写入AR列满足AR列的前提条件是一个edit列
# 满足AR列的条件(edit==1) && (该行的页面序号小于最大已访问页面序号)
if (max_page < old_page):
max_page = old_page
if (edit == 1 and old_page < max_page):
AR_list.append("1")
else:
AR_list.append("0")
# 以下代码:常规动作,更新全局变量
oldold_page = old_page
old_page = page
oldold_time = old_time
old_time = timestamp
else:
# 若该行是无内容null页用于分隔不同的学生作答
# 以下代码:此时可以重新初始化各个每个学生特有的变量
page = 0
before_tag = []
max_page = 0
# 以下代码如果改变了用户id或task_name则当前时间设为starttime即第一道题的开始时间
if ((old_id != id) or (old_task_name != task_name)):
print("###")
starttime = timestamp
# 以下代码:向写入新列中填充合适的空值
page_list.append("")
page_time_list.append("")
edit_list.append("0")
AR_list.append("0")
before_list.append("")
# 以下代码但针对after列切换了用户则必然代表切换到新页因此向after列中填充合适的空值
time_tmp = caltime(oldold_time, old_time)
if (time_tmp == ""):
time_tmp = 0
after_list.append(time_tmp)
# 以下代码:常规动作,更新全局变量
oldold_time = old_time
old_time = timestamp
oldold_page = old_page
old_page = page
# 以下代码:常规动作,更新全局变量
old_id = id
old_task_name = task_name
oldold_answer_dict = old_answer_dict
old_answer_dict = answer_dict
# 以下代码:将四个列表向前循环一位
x = edit_list.pop(0)
edit_list.append(x)
x = AR_list.pop(0)
AR_list.append(x)
x = before_list.pop(0)
before_list.append(x)
x = after_list.pop(0)
after_list.append(x)
# 以下代码将6个新列表写入原始pandas数据
col_name = data.columns.tolist()
col_name.insert(col_name.index('task_answer')+1, 'AR')
col_name.insert(col_name.index('task_answer')+1, 'after')
col_name.insert(col_name.index('task_answer')+1, 'before')
col_name.insert(col_name.index('task_answer')+1, 'edit')
col_name.insert(col_name.index('task_answer')+1, 'pagetime')
col_name.insert(col_name.index('task_answer')+1, 'page')
data = data.reindex(columns=col_name)
data['AR'] = AR_list
data['after'] = after_list
data['before'] = before_list
data['edit'] = edit_list
data['pagetime'] = page_time_list
data['page'] = page_list
data.to_excel('A_demo_out.xlsx') # 数据导出

View File

@ -0,0 +1,33 @@
import pandas as pd
data1 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx')
A1 = data1[data1['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')]
Z1 = data1[data1['contest_id'].str.contains('数学建模')]
A1.to_excel('A1.xlsx')
Z1.to_excel('Z1.xlsx')
data2 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx', sheet_name='Result 2')
A2 = data2[data2['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')]
Z2 = data2[data2['contest_id'].str.contains('数学建模')]
A2.to_excel('A2.xlsx')
Z2.to_excel('Z2.xlsx')
data3 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx', sheet_name='Result 3')
A3 = data3[data3['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')]
Z3 = data3[data3['contest_id'].str.contains('数学建模')]
A3.to_excel('A3.xlsx')
Z3.to_excel('Z3.xlsx')
data4 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx', sheet_name='Result 4')
A4 = data4[data4['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')]
Z4 = data4[data4['contest_id'].str.contains('数学建模')]
A4.to_excel('A4.xlsx')
Z4.to_excel('Z4.xlsx')
A = pd.concat([A1, A2, A3, A4])
Z = pd.concat([Z1, Z2, Z3, Z4])
A.to_excel('A.xlsx')
Z.to_excel('Z.xlsx')

View File

@ -0,0 +1,60 @@
import pandas as pd
data_a = pd.read_excel(r'a_out_0719.xlsx')
data_z = pd.read_excel(r'z_out_0719.xlsx')
level0to3 = 0
level3to6 = 0
level6to9 = 0
level9to12 = 0
level12to15 = 0
levelabove15 = 0
level0to5 = 0
level5to10 = 0
level10to15 = 0
level15to20 = 0
level20to25 = 0
level25to30 = 0
levelabove30 = 0
print("A")
for index, row in data_a.iterrows():
if (pd.isna(row['time_new'])):
continue
fen = float(row['time_new'])
if (fen <= 5):
level0to5 = level0to5 + 1
elif (fen > 5 and fen <= 10):
level5to10 = level5to10 + 1
elif (fen > 10 and fen <= 15):
level10to15 = level10to15 + 1
elif (fen > 15 and fen <= 20):
level15to20 = level15to20 + 1
elif (fen > 20 and fen <= 25):
level20to25 = level20to25 + 1
elif (fen > 25 and fen <= 30):
level25to30 = level25to30 + 1
else:
levelabove30 = levelabove30 + 1
print(level0to5, level5to10, level10to15, level15to20, level20to25, level25to30, levelabove30)
print("z")
for index, row in data_z.iterrows():
if (pd.isna(row['time_new'])):
continue
fen = float(row['time_new'])
if (fen <= 3):
level0to3 = level0to3 + 1
elif (fen > 3 and fen <= 6):
level3to6 = level3to6 + 1
elif (fen > 6 and fen <= 9):
level6to9 = level6to9 + 1
elif (fen > 9 and fen <= 12):
level9to12 = level9to12 + 1
elif (fen > 12 and fen <= 15):
level12to15 = level12to15 + 1
else:
levelabove15 = levelabove15 + 1
print(level0to3, level3to6, level6to9, level9to12, level12to15,levelabove15)

View File

@ -0,0 +1,11 @@
#### 结果性数据处理
该目录下的程序可以根据评分编码细则,获得对学生作答结果的自动化编码方法和编码结果,统计学生作答时长。
- getscore.py - 提供计算每个小题编码的函数
- calc_a.py - 计算A卷编码
- calc_z.py - 计算Z卷编码
- calc_time_a.py - 统计A卷作答时长
- calc_time_z.py - 统计Z卷作答时长
由于题目和数据的保密性质,此处不便给出编码细则和处理的结果性数据

169
pydata-han/Result/calc_a.py Normal file
View File

@ -0,0 +1,169 @@
import pandas as pd
import getscore as gs
data = pd.read_excel(r'out_0712.xlsx')
P3_codelist = []
mm60311_CODElist = []
mm60321_CODElist = []
mm60341_CODElist = []
xiaowang = []
xiaowangcompare = []
xiaoming = []
xiaozhang = []
xiaoli = []
xiaohua = []
mm60351_CODElist = []
P4_codelist = []
mm60411_CODElist = []
mm60421_CODElist = []
mm60441_CODElist = []
mm60442_CODElist = []
shouru = []
jiage = []
mm60461_CODElist = []
for index, row in data.iterrows():
P3 = row['P3']
P3_CODE = gs.compareP3(P3)
P3_codelist.append(P3_CODE)
mm60311 = row['MM60311'] # 依赖mm60331
mm60331_CODE = row['MM60331_CODE']
mm60311_CODE = gs.get60311(mm60311, mm60331_CODE)
mm60311_CODElist.append(mm60311_CODE)
mm60321 = row['MM60321']
mm60321_CODE = gs.get60321(mm60321)
mm60321_CODElist.append(mm60321_CODE)
mm60331 = row['MM60331_new']
mm60331_formula = gs.get331(mm60331)
wang = gs.cal331(mm60331, mm60331_formula, 2, 2, 1, 9.1, 7.15, 1.61, 1, 2, 1.61, 9.1)
xiaowang.append(wang)
ming = gs.cal331(mm60331, mm60331_formula, 4, 1, 3, 9.8, 7.82, 1.54, 1, 4, 7.82, 9.8)
xiaoming.append(ming)
zhang = gs.cal331(mm60331, mm60331_formula, 3, 4, 5, 9.3, 6.54, 1.47, 3, 5, 9.3, 1.47)
xiaozhang.append(zhang)
li = gs.cal331(mm60331, mm60331_formula, 5, 5, 4, 10.1, 6.32, 1.51, 4, 5, 1.51, 10.1)
xiaoli.append(li)
hua = gs.cal331(mm60331, mm60331_formula, 1, 3, 2, 8.5, 6.93, 1.58, 1, 3, 8.5, 6.93)
xiaohua.append(hua)
mm60341_An = [row['MM60341'], row['MM60342'], row['MM60343'], row['MM60344'], row['MM60345']]
mm60341_List = [wang, ming, zhang, li, hua]
mm60341_CODE = gs.get60341(mm60341_An, mm60341_List, mm60331)
mm60341_CODElist.append(mm60341_CODE)
mm60351 = row['MM60351'] # 依赖mm60331和mm60341
mm60351_CODE = gs.get60351(mm60351, mm60331_CODE, [wang, ming, zhang, li, hua])
mm60351_CODElist.append(mm60351_CODE)
P4 = row['P4']
P4_CODE = gs.compareP3(P4)
P4_codelist.append(P4_CODE)
mm60411 = row['MM60411'] #
mm60411_CODE = gs.get60411(mm60411)
mm60411_CODElist.append(mm60411_CODE)
mm60421 = row['MM60421'] #
mm60421_CODE = gs.get60421(mm60421)
mm60421_CODElist.append(mm60421_CODE)
mm60431 = row['MM60431_new']
mm60432 = row['MM60432_new']
mm60441 = row['MM60441'] # 和计算mm60431算式比较
shouru_right = gs.cal431(mm60431, 90000, 50000, 10, 8, 0.55, 0.5)
shouru.append(shouru_right)
mm60441_CODE = gs.compareFor(mm60441, shouru_right, mm60431)
mm60441_CODElist.append(mm60441_CODE)
mm60442 = row['MM60442'] # 和计算mm60432算式比较
jiage_right = gs.cal431(mm60432, 90000, 50000, 10, 8, 0.55, 0.5)
jiage.append(jiage_right)
mm60442_CODE = gs.compareFor(mm60442, jiage_right, mm60432)
mm60442_CODElist.append(mm60442_CODE)
mm60461 = row['MM60461'] # mm60461的答案取决于461-464
mm60462 = row['MM60462']
mm60463 = row['MM60463']
mm60464 = row['MM60464']
mm60461_CODE = gs.get60461([mm60461, mm60462, mm60463, mm60464])
mm60461_CODElist.append(mm60461_CODE)
col_name = data.columns.tolist()
col_name.insert(col_name.index('P3')+1, 'P3_CODE')
data = data.reindex(columns=col_name)
data['P3_CODE'] = P3_codelist
col_name.insert(col_name.index('MM60311')+1, 'MM60311_CODE')
data = data.reindex(columns=col_name)
data['MM60311_CODE'] = mm60311_CODElist
col_name.insert(col_name.index('MM60321')+1, 'MM60321_CODE')
data = data.reindex(columns=col_name)
data['MM60321_CODE'] = mm60321_CODElist
col_name.insert(col_name.index('MM60341')+1, 'wang')
data = data.reindex(columns=col_name)
data['wang'] = xiaowang
col_name.insert(col_name.index('MM60342')+1, 'ming')
data = data.reindex(columns=col_name)
data['ming'] = xiaoming
col_name.insert(col_name.index('MM60343')+1, 'zhang')
data = data.reindex(columns=col_name)
data['zhang'] = xiaozhang
col_name.insert(col_name.index('MM60344')+1, 'li')
data = data.reindex(columns=col_name)
data['li'] = xiaoli
col_name.insert(col_name.index('MM60345')+1, 'hua')
data = data.reindex(columns=col_name)
data['hua'] = xiaohua
col_name.insert(col_name.index('MM60341'), 'MM60341_CODE')
data = data.reindex(columns=col_name)
data['MM60341_CODE'] = mm60341_CODElist
col_name.insert(col_name.index('MM60351')+1, 'MM60351_CODE')
data = data.reindex(columns=col_name)
data['MM60351_CODE'] = mm60351_CODElist
col_name.insert(col_name.index('MM60431')+1, 'shouru')
data = data.reindex(columns=col_name)
data['shouru'] = shouru
col_name.insert(col_name.index('MM60432')+1, 'jiage')
data = data.reindex(columns=col_name)
data['jiage'] = jiage
col_name.insert(col_name.index('P4')+1, 'P4_CODE')
data = data.reindex(columns=col_name)
data['P4_CODE'] = P4_codelist
col_name.insert(col_name.index('MM60411')+1, 'MM60411_CODE')
data = data.reindex(columns=col_name)
data['MM60411_CODE'] = mm60411_CODElist
col_name.insert(col_name.index('MM60421')+1, 'MM60421_CODE')
data = data.reindex(columns=col_name)
data['MM60421_CODE'] = mm60421_CODElist
col_name.insert(col_name.index('MM60441')+1, 'MM60441_CODE')
data = data.reindex(columns=col_name)
data['MM60441_CODE'] = mm60441_CODElist
col_name.insert(col_name.index('MM60442')+1, 'MM60442_CODE')
data = data.reindex(columns=col_name)
data['MM60442_CODE'] = mm60442_CODElist
col_name.insert(col_name.index('MM60461')+1, 'MM60461_CODE')
data = data.reindex(columns=col_name)
data['MM60461_CODE'] = mm60461_CODElist
data.to_excel('a_out_0714.xlsx')

View File

@ -0,0 +1,72 @@
import pandas as pd
from datetime import datetime
data = pd.read_excel(r'a_out_0714.xlsx')
time_new_list = []
empty = 0
level0to5 = 0
level5to10 = 0
level10to15 = 0
level15to20 = 0
level20to25 = 0
level25to30 = 0
levelabove30 = 0
datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00'
datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00'
for index, row in data.iterrows():
P3_CODE = row['P3_CODE']
MM60311_CODE = row['MM60311_CODE']
if (pd.isna(row['stop_time']) and (int(P3_CODE) != 99 or int(MM60311_CODE) != 99)):
#print(P3_CODE == '99')
print(P3_CODE, MM60311_CODE, index)
empty = empty + 1
time_new_list.append("")
continue
if (P3_CODE == '99' or pd.isna(row['start_time']) or pd.isna(row['stop_time'])):
time_new_list.append("")
continue
# print(index)
time_ori = float(row['cost_time'])
try:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat)
except ValueError:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat2)
try:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat)
except ValueError:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat2)
delta = date2 - date1
#print(delta)
miao = delta.seconds
fen = round(miao/60, 2)
#if (fen != time_ori):
#print(str(time_ori), str(fen), str(index))
if (fen <= 5):
level0to5 = level0to5 + 1
elif (fen > 5 and fen <= 10):
level5to10 = level5to10 + 1
elif (fen > 10 and fen <= 15):
level10to15 = level10to15 + 1
elif (fen > 15 and fen <= 20):
level15to20 = level15to20 + 1
elif (fen > 20 and fen <= 25):
level20to25 = level20to25 + 1
elif (fen > 25 and fen <= 30):
level25to30 = level25to30 + 1
else:
levelabove30 = levelabove30 + 1
time_new_list.append(fen)
col_name = data.columns.tolist()
col_name.insert(col_name.index('cost_time')+1, 'time_new')
data = data.reindex(columns=col_name)
data['time_new'] = time_new_list
print(empty, level0to5, level5to10, level10to15, level15to20, level20to25, level25to30, levelabove30)
data.to_excel('a_out_0715.xlsx')

View File

@ -0,0 +1,93 @@
import pandas as pd
from datetime import datetime
data = pd.read_excel(r'z_out_0715.xlsx')
time_new_list = []
empty = 0
level0to3 = 0
level3to6 = 0
level6to9 = 0
level9to12 = 0
level12to15 = 0
levelabove15 = 0
level0to5 = 0
level5to10 = 0
level10to15 = 0
level15to20 = 0
level20to25 = 0
level25to30 = 0
levelabove30 = 0
datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00'
datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00'
for index, row in data.iterrows():
P1_CODE = row['P1_CODE']
MM60101_CODE = row['MM60101_CODE']
if (pd.isna(row['stop_time']) and (int(P1_CODE) != 99 or int(MM60101_CODE) != 99)):
#print(P3_CODE == '99')
print(P1_CODE, MM60101_CODE, index)
empty = empty + 1
time_new_list.append("")
continue
if (int(P1_CODE) == 99 or pd.isna(row['start_time']) or pd.isna(row['stop_time'])):
time_new_list.append("")
continue
# print(index)
time_ori = float(row['cost_time'])
try:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat)
except ValueError:
date1 = datetime.strptime(str(row['start_time']), datetimeFormat2)
try:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat)
except ValueError:
date2 = datetime.strptime(str(row['stop_time']), datetimeFormat2)
delta = date2 - date1
#print(delta)
miao = delta.seconds
fen = round(miao/60, 2)
#if (fen != time_ori):
#print(str(time_ori), str(fen), str(index))
if (fen <= 5):
level0to5 = level0to5 + 1
elif (fen > 5 and fen <= 10):
level5to10 = level5to10 + 1
elif (fen > 10 and fen <= 15):
level10to15 = level10to15 + 1
elif (fen > 15 and fen <= 20):
level15to20 = level15to20 + 1
elif (fen > 20 and fen <= 25):
level20to25 = level20to25 + 1
elif (fen > 25 and fen <= 30):
level25to30 = level25to30 + 1
else:
levelabove30 = levelabove30 + 1
if (fen <= 3):
level0to3 = level0to3 + 1
elif (fen > 3 and fen <= 6):
level3to6 = level3to6 + 1
elif (fen > 6 and fen <= 9):
level6to9 = level6to9 + 1
elif (fen > 9 and fen <= 12):
level9to12 = level9to12 + 1
elif (fen > 12 and fen <= 15):
level12to15 = level12to15 + 1
else:
levelabove15 = levelabove15 + 1
time_new_list.append(fen)
col_name = data.columns.tolist()
col_name.insert(col_name.index('cost_time')+1, 'time_new')
data = data.reindex(columns=col_name)
data['time_new'] = time_new_list
print(empty, level0to3, level3to6, level6to9, level9to12, level12to15,levelabove15)
print(empty, level0to5, level5to10, level10to15, level15to20, level20to25, level25to30, levelabove30)
data.to_excel('z_out_0715_time.xlsx')

View File

@ -0,0 +1,94 @@
import pandas as pd
import getscore as gs
data = pd.read_excel(r'z_out_0713_修改后.xlsx')
# manu = pd.read_csv(r'z_manu.csv')
code_dict = {}
p1_CODElist = []
mm60101_CODElist = []
mm60102_CODElist = []
mm60104_CODElist = []
mm60105_CODElist = []
mm60107_CODElist = []
mm60108_CODElist = []
shui_list = []
'''
for index, row in manu.iterrows():
key = str(row['id'])
value = str(row['MM60104'])
code_dict[key] = value
'''
for index, row in data.iterrows():
# id = row['ticket_id']
# mm60104_CODE = code_dict[str(id)]
# mm60104_CODElist.append(mm60104_CODE)
p1 = row['P1']
p1_CODE = gs.compareP3(p1)
p1_CODElist.append(p1_CODE)
mm60101 = row['MM60101']
mm60101_CODE = gs.get60101(mm60101)
mm60101_CODElist.append(mm60101_CODE)
mm60102 = row['MM60102']
mm60102_CODE = gs.get60102(mm60102)
mm60102_CODElist.append(mm60102_CODE)
mm60104 = row['MM60104_new']
mm60104_CODE = row['MM60104_CODE']
shui = gs.cal104(mm60104, gs.get104(mm60104), 2, 18, 500, 3, 3, 25, 1, 1, 7, 4)
shui_list.append(shui)
mm60105 = row['MM60105']
mm60105_CODE = gs.compareZ(mm60104_CODE, shui, mm60105)
mm60105_CODElist.append(mm60105_CODE)
mm601071, mm601072 = row['MM601071'], row['MM601072']
mm60107_CODE = gs.get60107(mm601071, mm601072, mm60105_CODE)
mm60107_CODElist.append(mm60107_CODE)
mm60108 = [row['MM601081'], row['MM601082'], row['MM601083'], row['MM601084']]
mm60108_CODE = gs.get60108(mm60108, mm60102_CODE)
mm60108_CODElist.append(mm60108_CODE)
col_name = data.columns.tolist()
col_name.insert(col_name.index('P1')+1, 'P1_CODE')
data = data.reindex(columns=col_name)
data['P1_CODE'] = p1_CODElist
col_name.insert(col_name.index('MM60101')+1, 'MM60101_CODE')
data = data.reindex(columns=col_name)
data['MM60101_CODE'] = mm60101_CODElist
col_name.insert(col_name.index('MM60102')+1, 'MM60102_CODE')
data = data.reindex(columns=col_name)
data['MM60102_CODE'] = mm60102_CODElist
'''
col_name.insert(col_name.index('MM60104')+1, 'MM60104_CODE')
data = data.reindex(columns=col_name)
data['MM60104_CODE'] = mm60104_CODElist
'''
col_name.insert(col_name.index('MM60105')+1, 'shui')
data = data.reindex(columns=col_name)
data['shui'] = shui_list
col_name.insert(col_name.index('MM60105')+2, 'MM60105_CODE')
data = data.reindex(columns=col_name)
data['MM60105_CODE'] = mm60105_CODElist
col_name.insert(col_name.index('MM601072')+1, 'MM60107_CODE')
data = data.reindex(columns=col_name)
data['MM60107_CODE'] = mm60107_CODElist
col_name.insert(col_name.index('MM601084')+1, 'MM60108_CODE')
data = data.reindex(columns=col_name)
data['MM60108_CODE'] = mm60108_CODElist
data.to_excel('z_out_0714.xlsx')

View File

@ -0,0 +1,433 @@
from decimal import MAX_EMAX
from itertools import count
from turtle import right
from unittest import result
import pandas as pd
file = open("error1.txt", "w")
def compareUp(right, answer):
if (pd.isna(right) or pd.isna(answer) or right=="" or answer==""):
return False
try:
right = float(right)
answer = float(answer)
except ValueError:
return False
if (right < 10):
right = right * 10
answer = answer * 10
try:
r = [int(right), round(right), int(100*right), round(100*right)]
print(r)
except OverflowError:
return False
a1 = int(answer)
a2 = round(answer)
if (a1 in r):
return True
if (a2 in r):
return True
return False
def compare(right, answer):
# 考虑:原答案、原答案四舍五入的结果
# 考虑:正确答案、正确答案四舍五入的结果
# 仅考虑整数部分、若两者有相同情况则返回true
if (pd.isna(right) or pd.isna(answer) or right=="" or answer==""):
return False
try:
right = float(right)
answer = float(answer)
except ValueError:
return False
if (right < 10):
right = right * 10
answer = answer * 10
try:
r = [int(right), round(right)]
except OverflowError:
return False
a1 = int(answer)
a2 = round(answer)
if (a1 in r):
return True
if (a2 in r):
return True
return False
def compareZ(code104, right, answer):
if (pd.isna(right) or pd.isna(answer) or right=="" or answer==""):
return 99
try:
right = float(right)
answer = float(answer)
except ValueError:
return 70
result = compare(right, answer)
if (result):
if (code104 != 40):
if (answer == 450):
return 74
elif(answer >= 440 and answer <= 460):
return 41
elif (answer < 440):
return 42
elif (answer > 460):
return 43
else:
return 40
else:
if (answer == 450):
return 74
elif(answer >= 440 and answer <= 460):
return 71
elif (answer < 440):
return 72
elif (answer > 460):
return 73
return 70
def compareFor(answer, right, formula):
try:
eval(formula)
return 70
except:
pass
if (pd.isna(right) or pd.isna(answer) or right=="" or answer==""):
return 99
if (compareUp(right, answer)):
return 40
return 70
def compareP3(input):
# 是否会出现序列数字不为4个的情况
s = str(input)
s = s.replace(' ', '')
if (s == ""):
return 99
l = s.split(',')
if (len(l) < 4):
return 99
count = 0
if (l[0] == 'B'):
count = count + 1
if (l[1] == 'D'):
count= count + 1
if (l[2] == 'A'):
count= count + 1
if (l[3] == 'C'):
count= count + 1
if (count == 0):
return 70
elif (count == 1):
return 10
elif (count == 2):
return 20
elif (count == 3):
return 30
else:
return 40
def get60107(mm71, mm72, mm60105):
if (pd.isna(mm71) or pd.isna(mm72) or mm71=="" or mm72==""):
return 99
if (mm60105 == 40 and mm71 =='A' and mm72 == 'D'):
return 20
if (mm60105 == 40 and mm71 =='A' and mm72 == 'A'):
return 40
if (mm60105 in [71, 41] and mm71 =='A' and mm72 == 'A'):
return 41
if (mm60105 in [72, 42] and mm71 =='B' and mm72 == 'D'):
return 42
if (mm60105 in [73, 43] and mm71 =='B' and mm72 == 'C'):
return 43
if (mm60105 == 74 and mm71 =='A' and mm72 == 'B'):
return 44
return 70
def get60108(answer, mm60102_CODE):
blank = 0
count = 0
for i in range(4):
if (pd.isna(answer[i]) or answer[i] == ""):
blank = blank + 1
if (blank == 4):
return 99
if (answer[0] == 'A'):
count = count + 1
if (answer[1] == 'C'):
count= count + 1
if (answer[2] == 'B'):
count= count + 1
if (answer[3] == 'A'):
count= count + 1
if (count == 2):
return 10
if (count == 3):
return 20
if (count == 4):
return 40
if (mm60102_CODE == 30 and answer[0] == 'A' and answer[1] == 'A' and answer[2] == 'B' and answer[3] == 'A'):
return 41
return 70
def get60311(answer, mm60331_CODE):
if (answer == "" or pd.isna(answer)):
return 99
if (answer == "D"):
return 40
if (answer == 'A' and mm60331_CODE in [31, 35, 41, 45]):
return 30
if (answer == 'B' and mm60331_CODE in [32, 36, 42, 46]):
return 31
if (answer == 'C' and mm60331_CODE in [33, 37, 43, 47]):
return 32
return 70
def get60101(answer):
if (answer == "" or pd.isna(answer)):
return 99
if (answer == "C"):
return 40
return 70
def get60321(answer):
if (answer == "" or pd.isna(answer)):
return 99
if (answer == "D"):
return 40
if (answer == "C"):
return 20
return 70
def get60411(answer):
if (answer == "" or pd.isna(answer)):
return 99
if (answer == "B"):
return 40
return 70
def get60421(answer):
if (answer == "" or pd.isna(answer)):
return 99
if (answer == "C"):
return 40
if (answer == "B"):
return 20
return 70
def get60102(answer):
if (answer == "" or pd.isna(answer)):
return 99
s = str(answer)
s = s.replace(' ', '')
l = s.split(',')
right = ['A', 'D', 'E', 'H']
right
if (len(l) == 2 and l[0] in right and l[1] in right):
return 20
if (len(l) == 3 and l[0] in right and l[1] in right and l[2] in right):
return 20
if (len(l) == 4 and l[0] in right and l[1] in right and l[2] in right and l[3] in right):
return 40
if (len(l) == 5):
if ('G' in l):
return 30
else:
return 31
return 70
def get60341(answer, right, formula):
try:
eval(formula)
return 70
except:
pass
# 空白是全部空白吗?
count = 0
blank = 0
for i in range(5):
if (pd.isna(answer[i]) or answer[i] == ""):
blank = blank + 1
if (blank == 5):
return 99
for i in range(5):
if (pd.isna(answer[i]) or pd.isna(right[i])):
continue
if (compare(answer[i], right[i])):
count = count + 1
if (count == 0):
return 70
if (count == 1):
return 71
if (count == 2):
return 10
if (count == 3):
return 20
if (count == 4):
return 30
if (count == 5):
return 40
def cal331(formula, f, RR, BR, JR, RS, BS, JS, GR, WR, GS, WS):
f = str(f)
f = f.replace('RR', str(RR))
f = f.replace('BR', str(BR))
f = f.replace('JR', str(JR))
f = f.replace('RS', str(RS))
f = f.replace('BS', str(BS))
f = f.replace('JS', str(JS))
f = f.replace('GR', str(GR))
f = f.replace('WR', str(WR))
f = f.replace('GS', str(GS))
f = f.replace('WS', str(WS))
f = f.replace('÷', '/')
f = f.replace('×', '*')
result = ""
try:
result = eval(f)
print(f + "=" + str(eval(f)))
except SyntaxError:
file.write("SyntaxError " + str(formula) + "\n")
return ""
except NameError:
file.write("NameError " + str(formula) + "\n")
return ""
except TypeError:
file.write("TypeError " + str(formula) + "\n")
return ""
return result
def cal104(formula, f, E, L, V, W, T, C, G, U, D, P):
f = str(f)
f = f.replace('L', str(L))
f = f.replace('V', str(V))
f = f.replace('C', str(C))
f = f.replace('G', str(G))
f = f.replace('D', str(D))
f = f.replace('P', str(P))
f = f.replace('E', str(E))
f = f.replace('T', str(T))
f = f.replace('W', str(W))
f = f.replace('U', str(U))
f = f.replace('÷', '/')
f = f.replace('×', '*')
result = ""
try:
result = eval(f)
print(f + "=" + str(eval(f)))
except SyntaxError:
file.write("SyntaxError " + str(formula) + "\n")
return ""
except NameError:
file.write("NameError " + str(formula) + "\n")
return ""
except TypeError:
file.write("TypeError " + str(formula) + "\n")
return ""
return str(result)
def cal431(f, Y, y, A, a, B, b):
f = str(f)
f = f.replace('Y', str(Y))
f = f.replace('y', str(y))
f = f.replace('A', str(A))
f = f.replace('B', str(B))
f = f.replace('a', str(a))
f = f.replace('b', str(b))
f = f.replace('÷', '/')
f = f.replace('×', '*')
result = ""
try:
result = eval(f)
print(f + "=" + str(eval(f)))
except SyntaxError:
file.write("SyntaxError " + str(f) + "\n")
return ""
except NameError:
file.write("NameError " + str(f) + "\n")
return ""
except TypeError:
file.write("TypeError " + str(f) + "\n")
return ""
except ZeroDivisionError:
file.write("ZeroDivisionError " + str(f) + "\n")
return ""
except OverflowError:
file.write("OverflowError " + str(f) + "\n")
return ""
return result
def get331(f):
s = str(f)
s = s.replace('该同学50米跑的排名', 'RR')
s = s.replace('该同学实心球的排名', 'BR')
s = s.replace('该同学立定跳远的排名', 'JR')
s = s.replace('该同学50米跑的成绩', 'RS')
s = s.replace('该同学实心球的成绩', 'BS')
s = s.replace('该同学立定跳远的成绩', 'JS')
s = s.replace('该同学表现最好项目的排名', 'GR')
s = s.replace('该同学表现最差项目的排名', 'WR')
s = s.replace('该同学表现最好项目的成绩', 'GS')
s = s.replace('该同学表现最差项目的成绩', 'WS')
return s
def get104(f):
s = str(f)
s = s.replace('每人每天要刷2次牙', 'E')
s = s.replace('牙刷的长度为18厘米', 'L')
s = s.replace('漱口杯的容量为500毫升', 'V')
s = s.replace('水龙头1分钟会流出3升水', 'W')
s = s.replace('每次刷牙平均需要3分钟', 'T')
s = s.replace('刷牙时的水温为25摄氏度', 'C')
s = s.replace('每次刷牙要使用1厘米牙膏', 'G')
s = s.replace('每次正常刷牙使用1升水用于漱口、冲洗牙刷等', 'U')
s = s.replace('每周7天', 'D')
s = s.replace('家中包括4个成员', 'P')
return s
def get60351(answer, mm60331, l):
max_student = l.index(max(l))
min_student = l.index(min(l))
max_student = ['A', 'B', 'C', 'D', 'E'][max_student]
min_student = ['A', 'B', 'C', 'D', 'E'][min_student]
if (answer == '' or pd.isna(answer)):
return 99
if (mm60331 in [10, 72] and answer == max_student):
return 20
if (mm60331 in [20, 38] and answer == max_student):
return 30
if (mm60331 in [30, 31, 32, 33, 40, 41, 42, 43] and answer == min_student):
return 40
if (mm60331 in [34, 35, 36, 37, 44, 45, 46, 47] and answer == max_student):
return 41
return 70
def get60461(answer):
count = 0
blank = 0
right = ['B', 'A', 'A', 'B']
for i in range(4):
if (pd.isna(answer[i]) or answer[i] == ""):
blank = blank + 1
if (blank == 4):
return 99
for i in range(4):
if (pd.isna(answer[i]) or pd.isna(right[i])):
continue
if (answer[i] == right[i]):
count = count + 1
if (count == 1):
return 10
if (count == 2):
return 20
if (count == 3):
return 30
if (count == 4):
return 40
return 70