diff --git a/pydata-han/Process/get_stoptime_a.py b/pydata-han/Process/get_stoptime_a.py new file mode 100644 index 0000000..6fe0e38 --- /dev/null +++ b/pydata-han/Process/get_stoptime_a.py @@ -0,0 +1,77 @@ +import pandas as pd +from datetime import datetime + +time_dict = {} +stop_time_dict = {} +data = pd.read_excel(r'A.xlsx') + +datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00' +datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00' + +for index, row in data.iterrows(): + id = str(row['ticket_id']) + timestamp = str(row['timestamp']) + if (id not in time_dict.keys()): + time_dict[id] = [] + time_dict[id].append(timestamp) + else: + time_dict[id].append(timestamp) + +for key, value in time_dict.items(): + stop_time_dict[key] = value[-1] + +data2 = pd.read_excel(r'a_out_0714.xlsx') + +stoptime_new_list = [] +time_new_list = [] +empty = 0 + +for index, row in data2.iterrows(): + print(index) + id = str(row['ticket_id']) + P1_CODE = row['P3_CODE'] + MM60101_CODE = row['MM60311_CODE'] + if (pd.isna(row['stop_time']) and (int(P1_CODE) != 99 or int(MM60101_CODE) != 99)): + if (id in stop_time_dict.keys()): + timestamp = stop_time_dict[id] + stoptime_new_list.append(timestamp) + try: + date2 = datetime.strptime(str(timestamp), datetimeFormat) + except ValueError: + date2 = datetime.strptime(str(timestamp), datetimeFormat2) + else: + empty = empty + 1 + stoptime_new_list.append("") + time_new_list.append("") + continue + elif (pd.isna(row['stop_time']) and int(P1_CODE) == 99 and int(MM60101_CODE) == 99): + empty = empty + 1 + stoptime_new_list.append("") + time_new_list.append("") + continue + else: + stoptime_new_list.append("") + try: + date2 = datetime.strptime(str(row['stop_time']), datetimeFormat) + except ValueError: + date2 = datetime.strptime(str(row['stop_time']), datetimeFormat2) + try: + date1 = datetime.strptime(str(row['start_time']), datetimeFormat) + except ValueError: + date1 = datetime.strptime(str(row['start_time']), datetimeFormat2) + delta = date2 - date1 + miao = delta.seconds + fen = round(miao/60, 2) + time_new_list.append(fen) + +col_name = data2.columns.tolist() + +col_name.insert(col_name.index('stop_time')+1, 'stoptime_new') +data2 = data2.reindex(columns=col_name) +data2['stoptime_new'] = stoptime_new_list + +col_name.insert(col_name.index('cost_time')+1, 'time_new') +data2 = data2.reindex(columns=col_name) +data2['time_new'] = time_new_list + +data2.to_excel('a_out_0719.xlsx') \ No newline at end of file diff --git a/pydata-han/Process/get_stoptime_z.py b/pydata-han/Process/get_stoptime_z.py new file mode 100644 index 0000000..518396e --- /dev/null +++ b/pydata-han/Process/get_stoptime_z.py @@ -0,0 +1,77 @@ +import pandas as pd +from datetime import datetime + +time_dict = {} +stop_time_dict = {} +data = pd.read_excel(r'Z.xlsx') + +datetimeFormat = '%Y-%m-%dT%H:%M:%S.%f+08:00' +datetimeFormat2 = '%Y-%m-%dT%H:%M:%S+08:00' + +for index, row in data.iterrows(): + id = str(row['ticket_id']) + timestamp = str(row['timestamp']) + if (id not in time_dict.keys()): + time_dict[id] = [] + time_dict[id].append(timestamp) + else: + time_dict[id].append(timestamp) + +for key, value in time_dict.items(): + stop_time_dict[key] = value[-1] + +data2 = pd.read_excel(r'z_out_0715.xlsx') + +stoptime_new_list = [] +time_new_list = [] +empty = 0 + +for index, row in data2.iterrows(): + print(index) + id = str(row['ticket_id']) + P1_CODE = row['P1_CODE'] + MM60101_CODE = row['MM60101_CODE'] + if (pd.isna(row['stop_time']) and (int(P1_CODE) != 99 or int(MM60101_CODE) != 99)): + if (id in stop_time_dict.keys()): + timestamp = stop_time_dict[id] + stoptime_new_list.append(timestamp) + try: + date2 = datetime.strptime(str(timestamp), datetimeFormat) + except ValueError: + date2 = datetime.strptime(str(timestamp), datetimeFormat2) + else: + empty = empty + 1 + stoptime_new_list.append("") + time_new_list.append("") + continue + elif (pd.isna(row['stop_time']) and int(P1_CODE) == 99 and int(MM60101_CODE) == 99): + empty = empty + 1 + stoptime_new_list.append("") + time_new_list.append("") + continue + else: + stoptime_new_list.append("") + try: + date2 = datetime.strptime(str(row['stop_time']), datetimeFormat) + except ValueError: + date2 = datetime.strptime(str(row['stop_time']), datetimeFormat2) + try: + date1 = datetime.strptime(str(row['start_time']), datetimeFormat) + except ValueError: + date1 = datetime.strptime(str(row['start_time']), datetimeFormat2) + delta = date2 - date1 + miao = delta.seconds + fen = round(miao/60, 2) + time_new_list.append(fen) + +col_name = data2.columns.tolist() + +col_name.insert(col_name.index('stop_time')+1, 'stoptime_new') +data2 = data2.reindex(columns=col_name) +data2['stoptime_new'] = stoptime_new_list + +col_name.insert(col_name.index('cost_time')+1, 'time_new') +data2 = data2.reindex(columns=col_name) +data2['time_new'] = time_new_list + +data2.to_excel('z_out_0719.xlsx') \ No newline at end of file diff --git a/pydata-han/Process/split.py b/pydata-han/Process/split.py new file mode 100644 index 0000000..3e9d663 --- /dev/null +++ b/pydata-han/Process/split.py @@ -0,0 +1,33 @@ +import pandas as pd + + +data1 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx') +A1 = data1[data1['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')] +Z1 = data1[data1['contest_id'].str.contains('数学建模')] +A1.to_excel('A1.xlsx') +Z1.to_excel('Z1.xlsx') + +data2 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx', sheet_name='Result 2') +A2 = data2[data2['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')] +Z2 = data2[data2['contest_id'].str.contains('数学建模')] +A2.to_excel('A2.xlsx') +Z2.to_excel('Z2.xlsx') + + +data3 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx', sheet_name='Result 3') +A3 = data3[data3['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')] +Z3 = data3[data3['contest_id'].str.contains('数学建模')] +A3.to_excel('A3.xlsx') +Z3.to_excel('Z3.xlsx') + +data4 = pd.read_excel(r'ticket_log_PBL_testing3.xlsx', sheet_name='Result 4') +A4 = data4[data4['contest_id'].str.contains('高阶能力测试B|高阶能力测试C')] +Z4 = data4[data4['contest_id'].str.contains('数学建模')] +A4.to_excel('A4.xlsx') +Z4.to_excel('Z4.xlsx') + +A = pd.concat([A1, A2, A3, A4]) +Z = pd.concat([Z1, Z2, Z3, Z4]) + +A.to_excel('A.xlsx') +Z.to_excel('Z.xlsx') \ No newline at end of file diff --git a/pydata-han/Process/statistic_time.py b/pydata-han/Process/statistic_time.py new file mode 100644 index 0000000..4398a37 --- /dev/null +++ b/pydata-han/Process/statistic_time.py @@ -0,0 +1,60 @@ +import pandas as pd + +data_a = pd.read_excel(r'a_out_0719.xlsx') +data_z = pd.read_excel(r'z_out_0719.xlsx') + +level0to3 = 0 +level3to6 = 0 +level6to9 = 0 +level9to12 = 0 +level12to15 = 0 +levelabove15 = 0 + +level0to5 = 0 +level5to10 = 0 +level10to15 = 0 +level15to20 = 0 +level20to25 = 0 +level25to30 = 0 +levelabove30 = 0 + +print("A") +for index, row in data_a.iterrows(): + if (pd.isna(row['time_new'])): + continue + fen = float(row['time_new']) + if (fen <= 5): + level0to5 = level0to5 + 1 + elif (fen > 5 and fen <= 10): + level5to10 = level5to10 + 1 + elif (fen > 10 and fen <= 15): + level10to15 = level10to15 + 1 + elif (fen > 15 and fen <= 20): + level15to20 = level15to20 + 1 + elif (fen > 20 and fen <= 25): + level20to25 = level20to25 + 1 + elif (fen > 25 and fen <= 30): + level25to30 = level25to30 + 1 + else: + levelabove30 = levelabove30 + 1 +print(level0to5, level5to10, level10to15, level15to20, level20to25, level25to30, levelabove30) + +print("z") +for index, row in data_z.iterrows(): + if (pd.isna(row['time_new'])): + continue + fen = float(row['time_new']) + if (fen <= 3): + level0to3 = level0to3 + 1 + elif (fen > 3 and fen <= 6): + level3to6 = level3to6 + 1 + elif (fen > 6 and fen <= 9): + level6to9 = level6to9 + 1 + elif (fen > 9 and fen <= 12): + level9to12 = level9to12 + 1 + elif (fen > 12 and fen <= 15): + level12to15 = level12to15 + 1 + else: + levelabove15 = levelabove15 + 1 + +print(level0to3, level3to6, level6to9, level9to12, level12to15,levelabove15) \ No newline at end of file