opendata/first_analysis/data_analysis.ipynb

806 lines
24 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "d85f147e",
"metadata": {},
"source": [
"# 数据处理\n",
"先转换为单题的单个字符一列的excel只提取出有效值例如frame\\\":{\\\"successRate\\\":,\\\"minJumps\\\":,\\\"jumps\\,这些说明字段删去;\n",
"# 数据分析\n",
"1、每个题的平均作答时长——结果数据</br>\n",
"\n",
"2、每个题的编码种类有多少种分别是什么每种多少学生——结果数据</br>\n",
"\n",
"3、每个题的每个操作步骤的平均作答时长——过程数据</br>\n",
"\n",
"4、正确率暂未提供标准编码可以探索一下题目本身协助形成标准答案编码——结果数据</br>\n",
"\n",
"5、关键节点通过数据探索学生在从初始状态向终止状态进行的过程中有几个关键步骤每个关键步骤有几种类型的关键节点编码体现“用数据说话”去探索关键节点。——过程数据</br>\n",
"【以上仅供参考,希望包含,但不限于此】\n",
"\n",
"答案解析一个frame字段是一个题。\n",
"每个题目的记录方式不同请用demo账号登录题目页面最上方点击打开答案可以看到该题目的每个步骤点击后出现的答案数据也可以直接与命题人沟通询问具体每个答案的内涵。\n",
"\n",
"\n",
"[\"{\\\"frame\\\":{\\\"successRate\\\":1,\\\"minJumps\\\":2,\\\"jumps\\\":2,\\\"path\\\":[1,3,2]}}\",\"{\\\"frame\\\":{\\\"successRate\\\":1,\\\"minJumps\\\":2,\\\"jumps\\\":2,\\\"path\\\":[3,1,0]}}\",\"{\\\"frame\\\":[\\\"00\\\",\\\"01\\\",\\\"02\\\",\\\"06\\\"]}\",\"{\\\"frame\\\":[\\\"00\\\",\\\"01\\\",\\\"02\\\",\\\"03\\\"]}\",\"{\\\"frame\\\":[[0,0,0,0,0,0,0],[1,0,1,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,1,0,0,0,1]]}\",\"{\\\"frame\\\":[\\\"C_B\\\"]}\",\"{\\\"frame\\\":[\\\"C_G\\\",\\\"F_G\\\"]}\",\"{\\\"frame\\\":[\\\"09_02\\\"]}\",\"{\\\"frame\\\":[\\\"v_10_v_2\\\"]}\",\"{\\\"frame\\\":[]}\",\"{\\\"frame\\\":[]}\",\"{\\\"frame\\\":[[[2,1],[3,1],[4,1],[4,2]]]}\",\"{\\\"frame\\\":[[{\\\"row\\\":0,\\\"col\\\":0}]]}\",\"{\\\"frame\\\":[[{\\\"row\\\":1,\\\"col\\\":0}]]}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0],\\\"start\\\":1,\\\"end\\\":16,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0],\\\"start\\\":1,\\\"end\\\":5,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0,1],\\\"start\\\":5,\\\"end\\\":5,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[0,1,0]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[0,1,0,1,3,2]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[null,null,2,0,2,3],[0,2,2,null,3,null],[null,0,null,null,1,null],[0,2,0,1,null,null]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"sequence\\\":[0,0,0,0,0,1,1],\\\"transformation\\\":[[0],[0,1]]}}\",\"{\\\"frame\\\":{\\\"sequence\\\":[0,0,0],\\\"transformation\\\":[[0,1],[1,0]]}}\",\"{\\\"frame\\\":[4,5]}\",\"{\\\"frame\\\":[0,1,2,2,0,0]}\"]\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e73e9beb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import json \n",
"import numpy as np\n",
"import ast\n",
"from datetime import datetime\n",
"import plotly as py\n",
"import plotly.graph_objs as go\n",
"from plotly.offline import plot\n",
"from IPython.core.display import HTML\n",
"import plotly.offline as offline\n",
"offline.init_notebook_mode(connected=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b62b3f15",
"metadata": {},
"outputs": [],
"source": [
"class data_analysis:\n",
" def __init__(self, df) -> None:\n",
" self.problem_num = 23\n",
" self.df = df\n",
" self.row_num = len(df)\n",
" self.df.insert(len(self.df.columns), 'ans', self.remove_str())\n",
" self.df.insert(len(self.df.columns), 'interval', self.get_interval())\n",
" self.ndf = pd.DataFrame(self.create_new_df())\n",
" self.ndf_list = self.divide_ndf()\n",
" self.group_list = self.group_by()\n",
" print('init complete')\n",
"\n",
" def remove_str_per_row(self, data_per_row):\n",
" frame_list = ast.literal_eval(data_per_row)\n",
" frame_dic_list = []\n",
" for index in range(len(frame_list)):\n",
" frame_dic_list.append(json.loads(frame_list[index])) \n",
" return frame_dic_list\n",
"\n",
" def remove_str(self):\n",
" ndf_ans_8_list = []\n",
" ndf_rm_frame = []\n",
" for i in range(self.row_num):\n",
" dic_temp = self.remove_str_per_row(self.df.loc[i,'task_answers'])\n",
" ndf_ans_8_list.append(dic_temp)\n",
" new_dic_list = []\n",
" for dic in dic_temp:\n",
" dic = dic['frame']\n",
" new_dic = dic\n",
" new_dic_list.append(new_dic)\n",
" ndf_rm_frame.append(new_dic_list)\n",
"\n",
" return ndf_rm_frame\n",
" \n",
" def get_interval(self):\n",
" interval_list = []\n",
" for i in range(len(self.df)):\n",
" interval_list.append(self.get_interval_per_row(i))\n",
" return interval_list\n",
"\n",
" def get_interval_per_row(self, index):\n",
" row_data = self.df.loc[index,:]\n",
" start_time = row_data['start_time']\n",
" start_time = datetime.strptime(start_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
"\n",
" expire_time = row_data['expire_time']\n",
" expire_time = datetime.strptime(expire_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
"\n",
" stop_time = row_data['stop_time']\n",
" if stop_time != stop_time:\n",
" return -1\n",
" stop_time = datetime.strptime(stop_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
"\n",
" total_sec = (stop_time - start_time).seconds\n",
" return total_sec\n",
" \n",
" def create_new_df(self):\n",
" twoD_list = []\n",
" for row in range(self.row_num):\n",
" ans_dic_list = self.df.loc[row, 'ans']\n",
" twoD_list.append(ans_dic_list)\n",
" return twoD_list\n",
" \n",
" def divide_ndf(self):\n",
" ndf_list = []\n",
" for i in range(len(self.ndf.columns)):\n",
" ndf_list.append(pd.DataFrame(self.ndf.loc[:,i]))\n",
" return ndf_list\n",
" \n",
" def group_by_per_problem(self, index):\n",
" df_temp = self.ndf_list[index]\n",
" df_str_list = []\n",
" for j in range(len(df_temp)):\n",
" ndf_index_j = df_temp.iloc[j, 0]\n",
" if ndf_index_j == None:\n",
" df_str_list.append(str(None))\n",
" else:\n",
" df_str_list.append(self.content_to_str(ndf_index_j))\n",
" df_temp.insert(1, 'ans_str', df_str_list)\n",
" df_per_problom = df_temp.groupby('ans_str')\n",
" return df_per_problom\n",
"\n",
" def content_to_str(self, data):\n",
" str_data = ''\n",
" if data == None:\n",
" return str(None)\n",
" elif type(data) == type([]):\n",
" return self.data_to_str(data)\n",
" elif 'data' in data.keys():\n",
" return self.data_to_str(data['data'])\n",
" else:\n",
" return self.data_to_str(data)\n",
"\n",
" def data_to_str(self, data):\n",
" if type(data) == type({}):\n",
" return str(list(data.values()))\n",
" else:\n",
" return str(data)\n",
"\n",
" def group_by(self):\n",
" group_list = []\n",
" for i in range(self.problem_num):\n",
" df_temp = self.group_by_per_problem(i)\n",
" group_list.append(df_temp)\n",
" return group_list\n",
"\n",
" def plot(self):\n",
" data = [go.Histogram(x=list(self.df.loc[:,'interval']))] \n",
" layout={\"title\": \"学生用时分布\", \n",
" \"xaxis_title\": \"学生用时,单位秒\",\n",
" \"yaxis_title\": \"学生个数\",\n",
" # x轴坐标倾斜60度\n",
" \"xaxis\": {\"tickangle\": 60}\n",
" }\n",
" fig = go.Figure(data=data,layout=layout)\n",
" plot(fig,filename=\"./plot/vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
" offline.iplot(fig) \n",
" return 0\n",
"\n",
" def plot_problem(self):\n",
" data = [go.Bar(x = list(range(self.problem_num)), y = [len(list(group)) for group in self.group_list])] \n",
" layout={\"title\": \"不同题目的编码数量\", \n",
" \"xaxis_title\": \"题目编号\",\n",
" \"yaxis_title\": \"编码个数\",\n",
" # x轴坐标倾斜60度\n",
" \"xaxis\": {\"tickangle\": 60}\n",
" }\n",
" fig = go.Figure(data=data,layout=layout)\n",
" plot(fig,filename=\"./plot/vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
" offline.iplot(fig) \n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3b99c9a0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"init complete\n"
]
}
],
"source": [
"df = pd.read_excel('./data/data.xlsx')\n",
"data_entity = data_analysis(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2b2fd91",
"metadata": {},
"outputs": [],
"source": [
"df.sort_values('school', ascending=True, inplace= False )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9cc5ada7",
"metadata": {},
"outputs": [],
"source": [
"grouped = df.groupby('school')\n",
"df_l = []\n",
"for value, group in grouped:\n",
" df_l.append(group)\n",
"df_l[1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abeddddf",
"metadata": {},
"outputs": [],
"source": [
"df.loc[0:266,:].to_excel('./data/junior.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b410779",
"metadata": {},
"outputs": [],
"source": [
"df.loc[267:, :].to_excel('./data/senior.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79639e93",
"metadata": {},
"outputs": [],
"source": [
"df_junior = pd.read_excel('./data/junior.xlsx')\n",
"df_junior"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0fb1a77f",
"metadata": {},
"outputs": [],
"source": [
"df_senior = pd.read_excel('./data/senior.xlsx')\n",
"df_senior"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1931a690",
"metadata": {},
"outputs": [],
"source": [
"data_entity.df"
]
},
{
"cell_type": "markdown",
"id": "02a7af39",
"metadata": {},
"source": [
"# 数据处理\n",
"处理函数定义在类data_analysis中的子函数remove_str()中\n",
"移除每一行中第8列中的frame\\\":{\\\"successRate\\\":,\\\"minJumps\\\":,\\\"jumps\\,这些说明字段删去;"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3811d24c",
"metadata": {},
"outputs": [],
"source": [
"index = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ccf75ef",
"metadata": {},
"outputs": [],
"source": [
"data_per_row = df.iloc[index,8]\n",
"frame_list = ast.literal_eval(data_per_row)\n",
"frame_dic_list = []\n",
"for index in range(len(frame_list)):\n",
" frame_dic_list.append(json.loads(frame_list[index])) \n",
"frame_dic_list"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5c756d1",
"metadata": {},
"outputs": [],
"source": [
"test_df = pd.DataFrame([[{1:'ds'},{2,'df'}], [{1:'ds'},{2,'df'}]])\n",
"test_df.iloc[0,0][1]"
]
},
{
"cell_type": "markdown",
"id": "1c91c870",
"metadata": {},
"source": [
"最后将经过处理的结构体的列表存储到新的列中新属性的名称为ans注意这一步操作已经在初始化函数中完成"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "401c1aa8",
"metadata": {},
"outputs": [],
"source": [
"dic_temp = data_entity.df.loc[0,'ans'][20]['frame']\n",
"dic_temp "
]
},
{
"cell_type": "markdown",
"id": "d2cc2d06",
"metadata": {},
"source": [
"# 添加列时长单位s\n",
"新增属性'interval'\n",
"\n",
"每一行的实例数据为:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f88915b0",
"metadata": {},
"outputs": [],
"source": [
"row_data = data_entity.df.loc[0,:]\n",
"row_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c95bb1c4",
"metadata": {},
"outputs": [],
"source": [
"\n",
"start_time = row_data['start_time']\n",
"start_time = datetime.strptime(start_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
"\n",
"expire_time = row_data['expire_time']\n",
"expire_time = datetime.strptime(expire_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
"\n",
"stop_time = row_data['stop_time']\n",
"stop_time = datetime.strptime(stop_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
"\n",
"total_sec = (stop_time - start_time).seconds"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3717094e",
"metadata": {},
"outputs": [],
"source": [
"total_sec"
]
},
{
"cell_type": "markdown",
"id": "037fdfff",
"metadata": {},
"source": [
"基于以上思路的代码实现在init函数中执行self.get_interval"
]
},
{
"cell_type": "markdown",
"id": "50977b2b",
"metadata": {},
"source": [
"## 所有答卷的完成时间的分布图如下"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "897431f1",
"metadata": {},
"outputs": [],
"source": [
"data_entity.plot()"
]
},
{
"cell_type": "markdown",
"id": "dbf98c6d",
"metadata": {},
"source": [
"# 新建关键字是题目的表\n",
"总共23个题目"
]
},
{
"cell_type": "markdown",
"id": "b189e621",
"metadata": {},
"source": [
"每一行中的ans属性为一个固定长度23的列表该列表中的元素格式为词典现在建立一个新的dataframe共662行23列每列对应同一个题目。"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d899862",
"metadata": {},
"outputs": [],
"source": [
"ndf = data_entity.ndf"
]
},
{
"cell_type": "markdown",
"id": "0a88ae59",
"metadata": {},
"source": [
"将ndf切分成23份并提取关键信息"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57807177",
"metadata": {},
"outputs": [],
"source": [
"ndf0 = ndf.loc[:,0]\n",
"ndf0_df = pd.DataFrame(ndf0)\n",
"ndf0_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a73a186",
"metadata": {},
"outputs": [],
"source": [
"ndf0 = data_entity"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3202fece",
"metadata": {},
"outputs": [],
"source": [
"a = list(data_entity.ndf_list[22].iloc[:,0])\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d1a3ff9",
"metadata": {},
"outputs": [],
"source": [
"for i in data_entity.ndf_list:\n",
" df_temp = i\n",
" df_str_list = []\n",
" for j in range(len(df_temp)):\n",
" ndf_index_j = df_temp.iloc[j, 0]\n",
" if ndf_index_j == None:\n",
" df_str_list.append(str(None))\n",
" else:\n",
" df_str_list.append(content_to_str(ndf_index_j))\n",
"# df_str_list\n",
"# df_temp.insert(1, 'ans_str', df_str_list)\n",
"# len(df_str_list)\n",
"# df_temp.insert(1, 'ans_str', df_str_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f4dc894",
"metadata": {},
"outputs": [],
"source": [
"def content_to_str(data):\n",
" str_data = ''\n",
" if data == None:\n",
" return str(None)\n",
" elif type(data) == type([]):\n",
" return data_to_str(data)\n",
" elif 'data' in data.keys():\n",
" return data_to_str(data['data'])\n",
" else:\n",
" return data_to_str(data)\n",
"\n",
"def data_to_str(data):\n",
" if type(data) == type({}):\n",
" return str(list(data.values()))\n",
" else:\n",
" return str(data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a1c59f2",
"metadata": {},
"outputs": [],
"source": [
"test = {'sequence': [1, 1, 0, 0, 1, 0, 1],\n",
" 'transformation': [[0], [1, 0]]}\n",
"test1 = [[0, 0, 0, 0, 0, 0, 0],\n",
" [1, 0, 1, 0, 0, 0, 0],\n",
" [0, 0, 0, 0, 0, 0, 0],\n",
" [0, 0, 0, 0, 0, 0, 0],\n",
" [1, 0, 0, 0, 0, 0, 0]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a9d3cee",
"metadata": {},
"outputs": [],
"source": [
"type(test) == type({})\n",
"str(list(test.values()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f656032",
"metadata": {},
"outputs": [],
"source": [
"test1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af9fc0ef",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "31c73f85",
"metadata": {},
"source": [
"题目的字典有三种格式:\n",
"1、ndf0.iloc[66,0] == None\n",
"\n",
"2、ndf0.iloc[0,0]\n",
"'data':{'successRate': 1, 'minJumps': 2, 'jumps': 2, 'path': [2, 0, 1]}\n",
"\n",
"3、ndf0.iloc[7,0]\n",
"{'successRate': 0, 'minJumps': 2, 'jumps': 3, 'path': [1, 2, 3, 4]}\n",
"\n",
"题目之间的data格式不一样\n",
"01为{'successRate': 1, 'minJumps': 2, 'jumps': 2, 'path': [2, 0, 1]}\n",
"\n",
"23为['00', '01', '02']\n",
"\n",
"4为[[0, 0, 0, 0, 0, 0, 0],\n",
" [1, 0, 1, 0, 0, 0, 0],\n",
" [0, 0, 0, 0, 0, 0, 0],\n",
" [0, 0, 0, 0, 0, 0, 0],\n",
" [1, 0, 0, 0, 0, 0, 0]]\n",
" \n",
"5为['B_A', 'C_A']\n",
"\n",
"6为['B_A', 'C_A', 'G_F', 'D_B', 'E_B']\n",
"\n",
"7为[[[0, 1], [0, 2], [1, 2]],\n",
" [[0, 4], [0, 5], [1, 5]],\n",
" [[0, 6], [0, 7], [1, 7]],\n",
" [[0, 10], [0, 11], [1, 11]]]\n",
" \n",
"8为[[[1, 0], [2, 0], [3, 0], [3, 1]],\n",
" [[2, 3], [3, 3], [4, 3], [4, 4]],\n",
" [[0, 4], [1, 4], [2, 4], [2, 5]],\n",
" [[0, 1], [1, 1], [2, 1], [2, 2]],\n",
" [[3, 2], [4, 2], [5, 2], [5, 3]]]\n",
" \n",
"9为['09_02', '05_06']\n",
"\n",
"10为['v_14_v_6', 'v_2_v_16', 'v_15_v_12']\n",
"\n",
"1112为[[{'row': 0, 'col': 0},\n",
" {'row': 1, 'col': 2},\n",
" {'row': 2, 'col': 1},\n",
" {'row': 0, 'col': 2},\n",
" {'row': 2, 'col': 4}]]\n",
" \n",
"13为 [2, 1]\n",
"\n",
"14为[1, 2, 1, 2, 2, 3]\n",
"\n",
"15为{'stamps': [1, 0, 1, 2], 'start': 1, 'end': 16, 'selected': True}}\n",
"\n",
"16为{'stamps': [2, 1, 0, 1, 0],\n",
" 'start': 1,\n",
" 'end': 5,\n",
" 'selected': False}\n",
" \n",
"17为{'stamps': [2, 1, 0, 1], 'start': 1, 'end': 5, 'selected': False}\n",
"\n",
"18为{'rotation': [[1, 0, 1]], 'lowered': [0, 0]}\n",
"\n",
" {'rotation': [[2, 2, 0]], 'lowered': None}\n",
" \n",
"19为{'rotation': [[0, 0, 1, 3, 0, 1]], 'lowered': None}\n",
"\n",
" {'rotation': [[2, 3, 1, 0, 0, 1]], 'lowered': [0, 2]}\n",
" \n",
"20为{'rotation': [[None, None, 1, 1, 1, 0],\n",
" [1, 1, 3, None, 0, None],\n",
" [None, 1, None, None, 0, None],\n",
" [1, 1, 1, 0, None, None]],\n",
" 'lowered': None}\n",
" \n",
"21为{'sequence': [1, 0, 1, 0, 0, 1, 1],\n",
"\n",
" 'transformation': [[0], [0, 1]]}\n",
" \n",
"22为{'sequence': [1, 1, 0, 0, 1, 0, 1],\n",
"\n",
" 'transformation': [[0], [1, 0]]}\n",
" "
]
},
{
"cell_type": "markdown",
"id": "487bdd9c",
"metadata": {},
"source": [
"## 每道题编码种类"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "132446f8",
"metadata": {},
"outputs": [],
"source": [
"# ndf0_group = ndf0_df.groupby(0)\n",
"# list(ndf0_group)\n",
"gl = data_entity.group_list\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24d6b280",
"metadata": {},
"outputs": [],
"source": [
"list(gl[4])[1][1]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d84f4c8",
"metadata": {},
"outputs": [],
"source": [
"g1.loc[0,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8526399f",
"metadata": {},
"outputs": [],
"source": [
"len(list(gl[6]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9add9fee",
"metadata": {},
"outputs": [],
"source": [
"data_entity.plot_problem()"
]
},
{
"cell_type": "markdown",
"id": "be338894",
"metadata": {},
"source": [
"基于上述求解思路编写self.group_by()函数以得出各个题目的编码数量"
]
},
{
"cell_type": "markdown",
"id": "7ba1a20b",
"metadata": {},
"source": [
"## 每道题正确率"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b9fd1d2b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}