forked from Open-CT/opendata
806 lines
24 KiB
Plaintext
806 lines
24 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d85f147e",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 数据处理\n",
|
||
"先转换为单题的单个字符一列的excel;只提取出有效值;例如frame\\\":{\\\"successRate\\\":,\\\"minJumps\\\":,\\\"jumps\\,这些说明字段删去;\n",
|
||
"# 数据分析\n",
|
||
"1、每个题的平均作答时长;——结果数据</br>\n",
|
||
"\n",
|
||
"2、每个题的编码种类(有多少种,分别是什么,每种多少学生);——结果数据</br>\n",
|
||
"\n",
|
||
"3、每个题的每个操作步骤的平均作答时长;——过程数据</br>\n",
|
||
"\n",
|
||
"4、正确率(暂未提供标准编码,可以探索一下题目本身,协助形成标准答案编码);——结果数据</br>\n",
|
||
"\n",
|
||
"5、关键节点(通过数据,探索学生在从初始状态向终止状态进行的过程中,有几个关键步骤,每个关键步骤有几种类型的关键节点编码),体现“用数据说话”去探索关键节点。——过程数据</br>\n",
|
||
"【以上仅供参考,希望包含,但不限于此】\n",
|
||
"\n",
|
||
"答案解析:一个frame字段是一个题。\n",
|
||
"每个题目的记录方式不同,请用demo账号登录题目,页面最上方点击打开答案,可以看到该题目的每个步骤点击后出现的答案数据,也可以直接与命题人沟通询问具体每个答案的内涵。\n",
|
||
"\n",
|
||
"\n",
|
||
"[\"{\\\"frame\\\":{\\\"successRate\\\":1,\\\"minJumps\\\":2,\\\"jumps\\\":2,\\\"path\\\":[1,3,2]}}\",\"{\\\"frame\\\":{\\\"successRate\\\":1,\\\"minJumps\\\":2,\\\"jumps\\\":2,\\\"path\\\":[3,1,0]}}\",\"{\\\"frame\\\":[\\\"00\\\",\\\"01\\\",\\\"02\\\",\\\"06\\\"]}\",\"{\\\"frame\\\":[\\\"00\\\",\\\"01\\\",\\\"02\\\",\\\"03\\\"]}\",\"{\\\"frame\\\":[[0,0,0,0,0,0,0],[1,0,1,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,1,0,0,0,1]]}\",\"{\\\"frame\\\":[\\\"C_B\\\"]}\",\"{\\\"frame\\\":[\\\"C_G\\\",\\\"F_G\\\"]}\",\"{\\\"frame\\\":[\\\"09_02\\\"]}\",\"{\\\"frame\\\":[\\\"v_10_v_2\\\"]}\",\"{\\\"frame\\\":[]}\",\"{\\\"frame\\\":[]}\",\"{\\\"frame\\\":[[[2,1],[3,1],[4,1],[4,2]]]}\",\"{\\\"frame\\\":[[{\\\"row\\\":0,\\\"col\\\":0}]]}\",\"{\\\"frame\\\":[[{\\\"row\\\":1,\\\"col\\\":0}]]}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0],\\\"start\\\":1,\\\"end\\\":16,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0],\\\"start\\\":1,\\\"end\\\":5,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0,1],\\\"start\\\":5,\\\"end\\\":5,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[0,1,0]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[0,1,0,1,3,2]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[null,null,2,0,2,3],[0,2,2,null,3,null],[null,0,null,null,1,null],[0,2,0,1,null,null]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"sequence\\\":[0,0,0,0,0,1,1],\\\"transformation\\\":[[0],[0,1]]}}\",\"{\\\"frame\\\":{\\\"sequence\\\":[0,0,0],\\\"transformation\\\":[[0,1],[1,0]]}}\",\"{\\\"frame\\\":[4,5]}\",\"{\\\"frame\\\":[0,1,2,2,0,0]}\"]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "e73e9beb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
" <script type=\"text/javascript\">\n",
|
||
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
|
||
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||
" if (typeof require !== 'undefined') {\n",
|
||
" require.undef(\"plotly\");\n",
|
||
" requirejs.config({\n",
|
||
" paths: {\n",
|
||
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
|
||
" }\n",
|
||
" });\n",
|
||
" require(['plotly'], function(Plotly) {\n",
|
||
" window._Plotly = Plotly;\n",
|
||
" });\n",
|
||
" }\n",
|
||
" </script>\n",
|
||
" "
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import json \n",
|
||
"import numpy as np\n",
|
||
"import ast\n",
|
||
"from datetime import datetime\n",
|
||
"import plotly as py\n",
|
||
"import plotly.graph_objs as go\n",
|
||
"from plotly.offline import plot\n",
|
||
"from IPython.core.display import HTML\n",
|
||
"import plotly.offline as offline\n",
|
||
"offline.init_notebook_mode(connected=True)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "b62b3f15",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class data_analysis:\n",
|
||
" def __init__(self, df) -> None:\n",
|
||
" self.problem_num = 23\n",
|
||
" self.df = df\n",
|
||
" self.row_num = len(df)\n",
|
||
" self.df.insert(len(self.df.columns), 'ans', self.remove_str())\n",
|
||
" self.df.insert(len(self.df.columns), 'interval', self.get_interval())\n",
|
||
" self.ndf = pd.DataFrame(self.create_new_df())\n",
|
||
" self.ndf_list = self.divide_ndf()\n",
|
||
" self.group_list = self.group_by()\n",
|
||
" print('init complete')\n",
|
||
"\n",
|
||
" def remove_str_per_row(self, data_per_row):\n",
|
||
" frame_list = ast.literal_eval(data_per_row)\n",
|
||
" frame_dic_list = []\n",
|
||
" for index in range(len(frame_list)):\n",
|
||
" frame_dic_list.append(json.loads(frame_list[index])) \n",
|
||
" return frame_dic_list\n",
|
||
"\n",
|
||
" def remove_str(self):\n",
|
||
" ndf_ans_8_list = []\n",
|
||
" ndf_rm_frame = []\n",
|
||
" for i in range(self.row_num):\n",
|
||
" dic_temp = self.remove_str_per_row(self.df.loc[i,'task_answers'])\n",
|
||
" ndf_ans_8_list.append(dic_temp)\n",
|
||
" new_dic_list = []\n",
|
||
" for dic in dic_temp:\n",
|
||
" dic = dic['frame']\n",
|
||
" new_dic = dic\n",
|
||
" new_dic_list.append(new_dic)\n",
|
||
" ndf_rm_frame.append(new_dic_list)\n",
|
||
"\n",
|
||
" return ndf_rm_frame\n",
|
||
" \n",
|
||
" def get_interval(self):\n",
|
||
" interval_list = []\n",
|
||
" for i in range(len(self.df)):\n",
|
||
" interval_list.append(self.get_interval_per_row(i))\n",
|
||
" return interval_list\n",
|
||
"\n",
|
||
" def get_interval_per_row(self, index):\n",
|
||
" row_data = self.df.loc[index,:]\n",
|
||
" start_time = row_data['start_time']\n",
|
||
" start_time = datetime.strptime(start_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
|
||
"\n",
|
||
" expire_time = row_data['expire_time']\n",
|
||
" expire_time = datetime.strptime(expire_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
|
||
"\n",
|
||
" stop_time = row_data['stop_time']\n",
|
||
" if stop_time != stop_time:\n",
|
||
" return -1\n",
|
||
" stop_time = datetime.strptime(stop_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
|
||
"\n",
|
||
" total_sec = (stop_time - start_time).seconds\n",
|
||
" return total_sec\n",
|
||
" \n",
|
||
" def create_new_df(self):\n",
|
||
" twoD_list = []\n",
|
||
" for row in range(self.row_num):\n",
|
||
" ans_dic_list = self.df.loc[row, 'ans']\n",
|
||
" twoD_list.append(ans_dic_list)\n",
|
||
" return twoD_list\n",
|
||
" \n",
|
||
" def divide_ndf(self):\n",
|
||
" ndf_list = []\n",
|
||
" for i in range(len(self.ndf.columns)):\n",
|
||
" ndf_list.append(pd.DataFrame(self.ndf.loc[:,i]))\n",
|
||
" return ndf_list\n",
|
||
" \n",
|
||
" def group_by_per_problem(self, index):\n",
|
||
" df_temp = self.ndf_list[index]\n",
|
||
" df_str_list = []\n",
|
||
" for j in range(len(df_temp)):\n",
|
||
" ndf_index_j = df_temp.iloc[j, 0]\n",
|
||
" if ndf_index_j == None:\n",
|
||
" df_str_list.append(str(None))\n",
|
||
" else:\n",
|
||
" df_str_list.append(self.content_to_str(ndf_index_j))\n",
|
||
" df_temp.insert(1, 'ans_str', df_str_list)\n",
|
||
" df_per_problom = df_temp.groupby('ans_str')\n",
|
||
" return df_per_problom\n",
|
||
"\n",
|
||
" def content_to_str(self, data):\n",
|
||
" str_data = ''\n",
|
||
" if data == None:\n",
|
||
" return str(None)\n",
|
||
" elif type(data) == type([]):\n",
|
||
" return self.data_to_str(data)\n",
|
||
" elif 'data' in data.keys():\n",
|
||
" return self.data_to_str(data['data'])\n",
|
||
" else:\n",
|
||
" return self.data_to_str(data)\n",
|
||
"\n",
|
||
" def data_to_str(self, data):\n",
|
||
" if type(data) == type({}):\n",
|
||
" return str(list(data.values()))\n",
|
||
" else:\n",
|
||
" return str(data)\n",
|
||
"\n",
|
||
" def group_by(self):\n",
|
||
" group_list = []\n",
|
||
" for i in range(self.problem_num):\n",
|
||
" df_temp = self.group_by_per_problem(i)\n",
|
||
" group_list.append(df_temp)\n",
|
||
" return group_list\n",
|
||
"\n",
|
||
" def plot(self):\n",
|
||
" data = [go.Histogram(x=list(self.df.loc[:,'interval']))] \n",
|
||
" layout={\"title\": \"学生用时分布\", \n",
|
||
" \"xaxis_title\": \"学生用时,单位秒\",\n",
|
||
" \"yaxis_title\": \"学生个数\",\n",
|
||
" # x轴坐标倾斜60度\n",
|
||
" \"xaxis\": {\"tickangle\": 60}\n",
|
||
" }\n",
|
||
" fig = go.Figure(data=data,layout=layout)\n",
|
||
" plot(fig,filename=\"./plot/vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
|
||
" offline.iplot(fig) \n",
|
||
" return 0\n",
|
||
"\n",
|
||
" def plot_problem(self):\n",
|
||
" data = [go.Bar(x = list(range(self.problem_num)), y = [len(list(group)) for group in self.group_list])] \n",
|
||
" layout={\"title\": \"不同题目的编码数量\", \n",
|
||
" \"xaxis_title\": \"题目编号\",\n",
|
||
" \"yaxis_title\": \"编码个数\",\n",
|
||
" # x轴坐标倾斜60度\n",
|
||
" \"xaxis\": {\"tickangle\": 60}\n",
|
||
" }\n",
|
||
" fig = go.Figure(data=data,layout=layout)\n",
|
||
" plot(fig,filename=\"./plot/vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
|
||
" offline.iplot(fig) \n",
|
||
" return 0"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "3b99c9a0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"init complete\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df = pd.read_excel('./data/data.xlsx')\n",
|
||
"data_entity = data_analysis(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d2b2fd91",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df.sort_values('school', ascending=True, inplace= False )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9cc5ada7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"grouped = df.groupby('school')\n",
|
||
"df_l = []\n",
|
||
"for value, group in grouped:\n",
|
||
" df_l.append(group)\n",
|
||
"df_l[1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "abeddddf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df.loc[0:266,:].to_excel('./data/junior.xlsx')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9b410779",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df.loc[267:, :].to_excel('./data/senior.xlsx')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "79639e93",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_junior = pd.read_excel('./data/junior.xlsx')\n",
|
||
"df_junior"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0fb1a77f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_senior = pd.read_excel('./data/senior.xlsx')\n",
|
||
"df_senior"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1931a690",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_entity.df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "02a7af39",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 数据处理\n",
|
||
"处理函数定义在类data_analysis中的子函数remove_str()中\n",
|
||
"移除每一行中第8列中的frame\\\":{\\\"successRate\\\":,\\\"minJumps\\\":,\\\"jumps\\,这些说明字段删去;"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3811d24c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"index = 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5ccf75ef",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_per_row = df.iloc[index,8]\n",
|
||
"frame_list = ast.literal_eval(data_per_row)\n",
|
||
"frame_dic_list = []\n",
|
||
"for index in range(len(frame_list)):\n",
|
||
" frame_dic_list.append(json.loads(frame_list[index])) \n",
|
||
"frame_dic_list"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e5c756d1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_df = pd.DataFrame([[{1:'ds'},{2,'df'}], [{1:'ds'},{2,'df'}]])\n",
|
||
"test_df.iloc[0,0][1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1c91c870",
|
||
"metadata": {},
|
||
"source": [
|
||
"最后将经过处理的结构体的列表存储到新的列中,新属性的名称为‘ans’,注意这一步操作已经在初始化函数中完成"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "401c1aa8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dic_temp = data_entity.df.loc[0,'ans'][20]['frame']\n",
|
||
"dic_temp "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d2cc2d06",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 添加列(时长),单位s\n",
|
||
"新增属性'interval'\n",
|
||
"\n",
|
||
"每一行的实例数据为:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f88915b0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"row_data = data_entity.df.loc[0,:]\n",
|
||
"row_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "c95bb1c4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"start_time = row_data['start_time']\n",
|
||
"start_time = datetime.strptime(start_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
|
||
"\n",
|
||
"expire_time = row_data['expire_time']\n",
|
||
"expire_time = datetime.strptime(expire_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
|
||
"\n",
|
||
"stop_time = row_data['stop_time']\n",
|
||
"stop_time = datetime.strptime(stop_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
|
||
"\n",
|
||
"total_sec = (stop_time - start_time).seconds"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3717094e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"total_sec"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "037fdfff",
|
||
"metadata": {},
|
||
"source": [
|
||
"基于以上思路的代码实现在init函数中执行(self.get_interval)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "50977b2b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 所有答卷的完成时间的分布图如下"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "897431f1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_entity.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "dbf98c6d",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 新建关键字是题目的表\n",
|
||
"(总共23个题目)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b189e621",
|
||
"metadata": {},
|
||
"source": [
|
||
"每一行中的‘ans’属性为一个固定长度(23)的列表,该列表中的元素格式为词典,现在建立一个新的dataframe,共662行,23列,每列对应同一个题目。"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "2d899862",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ndf = data_entity.ndf"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "0a88ae59",
|
||
"metadata": {},
|
||
"source": [
|
||
"将ndf切分成23份并提取关键信息"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "57807177",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ndf0 = ndf.loc[:,0]\n",
|
||
"ndf0_df = pd.DataFrame(ndf0)\n",
|
||
"ndf0_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "4a73a186",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ndf0 = data_entity"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3202fece",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"a = list(data_entity.ndf_list[22].iloc[:,0])\n",
|
||
"a"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3d1a3ff9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for i in data_entity.ndf_list:\n",
|
||
" df_temp = i\n",
|
||
" df_str_list = []\n",
|
||
" for j in range(len(df_temp)):\n",
|
||
" ndf_index_j = df_temp.iloc[j, 0]\n",
|
||
" if ndf_index_j == None:\n",
|
||
" df_str_list.append(str(None))\n",
|
||
" else:\n",
|
||
" df_str_list.append(content_to_str(ndf_index_j))\n",
|
||
"# df_str_list\n",
|
||
"# df_temp.insert(1, 'ans_str', df_str_list)\n",
|
||
"# len(df_str_list)\n",
|
||
"# df_temp.insert(1, 'ans_str', df_str_list)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0f4dc894",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def content_to_str(data):\n",
|
||
" str_data = ''\n",
|
||
" if data == None:\n",
|
||
" return str(None)\n",
|
||
" elif type(data) == type([]):\n",
|
||
" return data_to_str(data)\n",
|
||
" elif 'data' in data.keys():\n",
|
||
" return data_to_str(data['data'])\n",
|
||
" else:\n",
|
||
" return data_to_str(data)\n",
|
||
"\n",
|
||
"def data_to_str(data):\n",
|
||
" if type(data) == type({}):\n",
|
||
" return str(list(data.values()))\n",
|
||
" else:\n",
|
||
" return str(data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "4a1c59f2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test = {'sequence': [1, 1, 0, 0, 1, 0, 1],\n",
|
||
" 'transformation': [[0], [1, 0]]}\n",
|
||
"test1 = [[0, 0, 0, 0, 0, 0, 0],\n",
|
||
" [1, 0, 1, 0, 0, 0, 0],\n",
|
||
" [0, 0, 0, 0, 0, 0, 0],\n",
|
||
" [0, 0, 0, 0, 0, 0, 0],\n",
|
||
" [1, 0, 0, 0, 0, 0, 0]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1a9d3cee",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"type(test) == type({})\n",
|
||
"str(list(test.values()))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "8f656032",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "af9fc0ef",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "31c73f85",
|
||
"metadata": {},
|
||
"source": [
|
||
"题目的字典有三种格式:\n",
|
||
"1、ndf0.iloc[66,0] == None\n",
|
||
"\n",
|
||
"2、ndf0.iloc[0,0]\n",
|
||
"'data':{'successRate': 1, 'minJumps': 2, 'jumps': 2, 'path': [2, 0, 1]}\n",
|
||
"\n",
|
||
"3、ndf0.iloc[7,0]\n",
|
||
"{'successRate': 0, 'minJumps': 2, 'jumps': 3, 'path': [1, 2, 3, 4]}\n",
|
||
"\n",
|
||
"题目之间的data格式不一样:\n",
|
||
"0,1为:{'successRate': 1, 'minJumps': 2, 'jumps': 2, 'path': [2, 0, 1]}\n",
|
||
"\n",
|
||
"2,3为:['00', '01', '02']\n",
|
||
"\n",
|
||
"4为:[[0, 0, 0, 0, 0, 0, 0],\n",
|
||
" [1, 0, 1, 0, 0, 0, 0],\n",
|
||
" [0, 0, 0, 0, 0, 0, 0],\n",
|
||
" [0, 0, 0, 0, 0, 0, 0],\n",
|
||
" [1, 0, 0, 0, 0, 0, 0]]\n",
|
||
" \n",
|
||
"5为:['B_A', 'C_A']\n",
|
||
"\n",
|
||
"6为:['B_A', 'C_A', 'G_F', 'D_B', 'E_B']\n",
|
||
"\n",
|
||
"7为:[[[0, 1], [0, 2], [1, 2]],\n",
|
||
" [[0, 4], [0, 5], [1, 5]],\n",
|
||
" [[0, 6], [0, 7], [1, 7]],\n",
|
||
" [[0, 10], [0, 11], [1, 11]]]\n",
|
||
" \n",
|
||
"8为:[[[1, 0], [2, 0], [3, 0], [3, 1]],\n",
|
||
" [[2, 3], [3, 3], [4, 3], [4, 4]],\n",
|
||
" [[0, 4], [1, 4], [2, 4], [2, 5]],\n",
|
||
" [[0, 1], [1, 1], [2, 1], [2, 2]],\n",
|
||
" [[3, 2], [4, 2], [5, 2], [5, 3]]]\n",
|
||
" \n",
|
||
"9为['09_02', '05_06']\n",
|
||
"\n",
|
||
"10为['v_14_v_6', 'v_2_v_16', 'v_15_v_12']\n",
|
||
"\n",
|
||
"11,12为[[{'row': 0, 'col': 0},\n",
|
||
" {'row': 1, 'col': 2},\n",
|
||
" {'row': 2, 'col': 1},\n",
|
||
" {'row': 0, 'col': 2},\n",
|
||
" {'row': 2, 'col': 4}]]\n",
|
||
" \n",
|
||
"13为 [2, 1]\n",
|
||
"\n",
|
||
"14为[1, 2, 1, 2, 2, 3]\n",
|
||
"\n",
|
||
"15为{'stamps': [1, 0, 1, 2], 'start': 1, 'end': 16, 'selected': True}}\n",
|
||
"\n",
|
||
"16为{'stamps': [2, 1, 0, 1, 0],\n",
|
||
" 'start': 1,\n",
|
||
" 'end': 5,\n",
|
||
" 'selected': False}\n",
|
||
" \n",
|
||
"17为{'stamps': [2, 1, 0, 1], 'start': 1, 'end': 5, 'selected': False}\n",
|
||
"\n",
|
||
"18为{'rotation': [[1, 0, 1]], 'lowered': [0, 0]}\n",
|
||
"\n",
|
||
" {'rotation': [[2, 2, 0]], 'lowered': None}\n",
|
||
" \n",
|
||
"19为{'rotation': [[0, 0, 1, 3, 0, 1]], 'lowered': None}\n",
|
||
"\n",
|
||
" {'rotation': [[2, 3, 1, 0, 0, 1]], 'lowered': [0, 2]}\n",
|
||
" \n",
|
||
"20为{'rotation': [[None, None, 1, 1, 1, 0],\n",
|
||
" [1, 1, 3, None, 0, None],\n",
|
||
" [None, 1, None, None, 0, None],\n",
|
||
" [1, 1, 1, 0, None, None]],\n",
|
||
" 'lowered': None}\n",
|
||
" \n",
|
||
"21为{'sequence': [1, 0, 1, 0, 0, 1, 1],\n",
|
||
"\n",
|
||
" 'transformation': [[0], [0, 1]]}\n",
|
||
" \n",
|
||
"22为{'sequence': [1, 1, 0, 0, 1, 0, 1],\n",
|
||
"\n",
|
||
" 'transformation': [[0], [1, 0]]}\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "487bdd9c",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 每道题编码种类"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "132446f8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# ndf0_group = ndf0_df.groupby(0)\n",
|
||
"# list(ndf0_group)\n",
|
||
"gl = data_entity.group_list\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "24d6b280",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"list(gl[4])[1][1]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6d84f4c8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"g1.loc[0,1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "8526399f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"len(list(gl[6]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9add9fee",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_entity.plot_problem()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "be338894",
|
||
"metadata": {},
|
||
"source": [
|
||
"基于上述求解思路,编写self.group_by()函数以得出各个题目的编码数量"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "7ba1a20b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 每道题正确率"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b9fd1d2b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.8"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|