opendata/human.ipynb

172 lines
3.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"idtable = pd.read_csv('./4137变量.csv')\n",
"# names = ['time', 'des', 'id', 'qcode','title', 'data']\n",
"table = pd.read_csv('0906.csv')\n",
"print(table)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"li = list(idtable['STU_CODE'])\n",
"li = map(str, li)\n",
"li = list(li)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(len(li))\n",
"print(len(table))\n",
"print(li[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"litodrop = []\n",
"count = 0\n",
"for i in table.index:\n",
" if table.loc[i, 'id'] == 'demo' or str(table.loc[i, 'id']) not in li:\n",
" count += 1\n",
" litodrop.append(i)\n",
"print(count)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table1 = table.drop(litodrop)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table1.to_csv('filter.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"idli = set(list(table1['id']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"iidli = set(list(table['id']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table2 = pd.read_csv('filter.csv', index_col=0)\n",
"print(table2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table2 = table2[['time', 'id', 'qcode', 'data']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(table2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table2['time'] = pd.to_datetime(table2['time'])\n",
"table2 = table2.sort_values(['id', 'time'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table2.to_csv('filter.csv')"
]
}
],
"metadata": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
},
"kernelspec": {
"display_name": "Python 3.6.9 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}