atom-predict/msunet/DataPreProcess.ipynb

382 lines
11 KiB
Plaintext
Executable File

{
"cells": [
{
"cell_type": "code",
"execution_count": 64,
"id": "66ee10ca-9a42-4e52-a56c-3c4e8b470afa",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.344161Z",
"start_time": "2024-07-08T02:11:39.339864Z"
}
},
"outputs": [],
"source": [
"import os\n",
"import cv2\n",
"import glob\n",
"import copy\n",
"import json\n",
"import shutil\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from PIL import Image\n",
"from labelme import utils\n",
"from skimage.feature import peak_local_max"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "40711569-6f55-49d2-8a88-152c7b677218",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.352156Z",
"start_time": "2024-07-08T02:11:39.350458Z"
}
},
"outputs": [],
"source": [
"# class_dict = {\n",
"# 1: 'Norm', \n",
"# 2: 'SV',\n",
"# 3: 'LineSV',\n",
"# }\n",
"# \n",
"# class_dict_rev = {\n",
"# 'Norm': 1, \n",
"# 'SV': 2,\n",
"# 'LineSV': 3,\n",
"# }"
]
},
{
"cell_type": "code",
"outputs": [],
"source": [
"class_dict = {\n",
" 1: 'Norm', \n",
" 2: 'SV',\n",
" 3: 'LineSV',\n",
" 1: 'atom', \n",
"}\n",
"\n",
"class_dict_rev = {\n",
" 'Norm': 1, \n",
" 'SV': 2,\n",
" 'LineSV': 3,\n",
" 'atom': 1, \n",
" \n",
"}"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.363439Z",
"start_time": "2024-07-08T02:11:39.361279Z"
}
},
"id": "53eea149372b6b63",
"execution_count": 66
},
{
"cell_type": "code",
"execution_count": 67,
"id": "1762751c-7d19-482c-96ff-477b9ea50e5b",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.368858Z",
"start_time": "2024-07-08T02:11:39.364312Z"
}
},
"outputs": [],
"source": [
"def crop_slide(img_path, save_path, patch_size=256, step=128):\n",
" #2048*2048,裁减出来的size是256*256,滑动窗口128\n",
" \n",
" base_name = img_path.split('/')[-1].split('.')[0]\n",
" json_path = img_path.replace('.jpg', '.json')\n",
" img = cv2.imread(img_path, 0)\n",
" # img = cv2.equalizeHist(img)\n",
" # img = cv2.GaussianBlur(img, (5, 5), 0)\n",
" h, w = img.shape\n",
" \n",
" with open(json_path) as f:\n",
" json_data = json.load(f)\n",
"\n",
" points = np.array([item['points'][0][::-1] for item in json_data['shapes']], np.int32)\n",
" labels = np.array([class_dict_rev[item['label']] for item in json_data['shapes']], np.int32)\n",
" \n",
" mask = np.zeros_like(img)\n",
" mask[points[:, 0], points[:, 1]] = labels\n",
"\n",
" for i in range(0, h-patch_size+1, step):\n",
" for j in range(0, w-patch_size+1, step):\n",
" v_nums = np.sum(mask[i:i+patch_size, j:j+patch_size] > 1)\n",
" \n",
" Image.fromarray(img[i:i+patch_size, j:j+patch_size]).save(\n",
" os.path.join(save_path, 'img', '{}_{}_{}_{}.png'.format(base_name, str(i), str(j), str(v_nums)))\n",
" )\n",
" \n",
" Image.fromarray(mask[i:i+patch_size, j:j+patch_size]).save(\n",
" os.path.join(save_path, 'lbl', '{}_{}_{}_{}.png'.format(base_name, str(i), str(j), str(v_nums)))\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "1417b046-4e1c-4b9c-9aff-cf5295701601",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.373886Z",
"start_time": "2024-07-08T02:11:39.369909Z"
}
},
"outputs": [],
"source": [
"def process_slide(img_path, save_path):\n",
" base_name = img_path.split('/')[-1].split('.')[0]\n",
" json_path = img_path.replace('.png', '.json')\n",
" img = cv2.imread(img_path, 0)\n",
" # img = cv2.equalizeHist(img)\n",
" # img = cv2.GaussianBlur(img, (5, 5), 0)\n",
" \n",
" h, w = img.shape\n",
" \n",
" with open(json_path) as f:\n",
" json_data = json.load(f)\n",
"\n",
" points = np.array([item['points'][0][::-1] for item in json_data['shapes']], np.int32)\n",
" labels = np.array([class_dict_rev[item['label']] for item in json_data['shapes']], np.int32)\n",
" \n",
" # mask = np.zeros_like(img)\n",
" # for idx, point in enumerate(points):\n",
" # cv2.circle(mask, point[::-1], 8, int(labels[idx]), -1)\n",
" \n",
" mask = np.zeros_like(img)\n",
" mask[points[:, 0], points[:, 1]] = labels\n",
" \n",
" Image.fromarray(img).save(\n",
" os.path.join(save_path, 'img', '{}.png'.format(base_name))\n",
" )\n",
" \n",
" Image.fromarray(mask).save(\n",
" os.path.join(save_path, 'lbl', '{}.png'.format(base_name))\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "59c532e0-39ec-406c-8d77-e0a17a9181cd",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.377906Z",
"start_time": "2024-07-08T02:11:39.374544Z"
}
},
"outputs": [],
"source": [
"# img_lst = glob.glob('../../data/new_v2/train_our_predict/*.jpg') \n",
"# img_lst.sort(); len(img_lst)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "41c0bdb9-f5ca-4744-bc8a-d5d9723716c9",
"metadata": {
"tags": [],
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.381528Z",
"start_time": "2024-07-08T02:11:39.379265Z"
}
},
"outputs": [],
"source": [
"for _type in ['train', 'valid', 'test']: \n",
" os.makedirs('/home/gao/mouclear/cc/data_new/msunet/train_and_test_only_10/{}/img'.format(_type), exist_ok=True)\n",
" os.makedirs('/home/gao/mouclear/cc/data_new/msunet/train_and_test_only_10/{}/lbl'.format(_type), exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "378409b3-66b2-4325-8822-104c9c2519a1",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:39.386035Z",
"start_time": "2024-07-08T02:11:39.382040Z"
}
},
"outputs": [],
"source": [
"# crop_slide('../../data/linesv/slide/0.jpg', save_path='../../data/linesv/patch_unet/train/', step=64)\n",
"# crop_slide('../../data/linesv/slide/3.jpg', save_path='../../data/linesv/patch_unet/valid/', step=256)"
]
},
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10.jpg\n"
]
}
],
"source": [
"folder_path = '/home/gao/mouclear/cc/data_new/msunet/train_and_test_only_10/train' # 替换为你的文件夹路径\n",
"\n",
"# 遍历文件夹中的所有文件\n",
"for filename in os.listdir(folder_path):\n",
" # 检查文件扩展名,确保只处理图片文件\n",
" if filename.lower().endswith(('.jpg')):\n",
" # 构建完整的文件路径\n",
" print(filename)\n",
" file_path = os.path.join(folder_path, filename)\n",
" crop_slide(file_path, save_path='/home/gao/mouclear/cc/data_new/msunet/train_and_test_only_10/train/', step=64)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-07-08T02:11:43.253768Z",
"start_time": "2024-07-08T02:11:39.386537Z"
}
},
"id": "ccf06ae2191fa5d4",
"execution_count": 72
},
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10.json\n",
"img\n",
"lbl\n",
"10.jpg\n",
"10.png\n"
]
}
],
"source": [
"folder_path = '/home/gao/mouclear/cc/data_new/msunet/train_and_test_only_10/valid' # 替换为你的文件夹路径\n",
"\n",
"# 遍历文件夹中的所有文件\n",
"for filename in os.listdir(folder_path):\n",
" print(filename)\n",
" # 检查文件扩展名,确保只处理图片文件\n",
" if filename.lower().endswith(('.jpg')):\n",
" # 构建完整的文件路径\n",
" file_path = os.path.join(folder_path, filename)\n",
" crop_slide(file_path, save_path='/home/gao/mouclear/cc/data_new/msunet/train_and_test_only_10/valid/', step=256)\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-07-08T02:11:43.635764Z",
"start_time": "2024-07-08T02:11:43.254767Z"
}
},
"id": "ab9e77c4a6dcea69",
"execution_count": 73
},
{
"cell_type": "code",
"outputs": [],
"source": [
"# folder_path = '../../data/new_v3/test_our_predict/' # 替换为你的文件夹路径\n",
"# \n",
"# # 遍历文件夹中的所有文件\n",
"# for filename in os.listdir(folder_path):\n",
"# # 检查文件扩展名,确保只处理图片文件\n",
"# if filename.lower().endswith(('.png')):\n",
"# # 构建完整的文件路径\n",
"# file_path = os.path.join(folder_path, filename)\n",
"# print(file_path)\n",
"# process_slide(file_path, save_path='../../data/new_v3/patch_unet/test/')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-07-08T02:11:43.638103Z",
"start_time": "2024-07-08T02:11:43.636490Z"
}
},
"id": "6ca7d1df1fe8764e",
"execution_count": 74
},
{
"cell_type": "code",
"execution_count": 75,
"id": "14d437d6-a43c-4471-a5ff-5df43e273819",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:43.648435Z",
"start_time": "2024-07-08T02:11:43.638835Z"
}
},
"outputs": [],
"source": [
"# for name in [2, 4, 6, 8, 10, 20, 30, 40]:\n",
"# process_slide('../../data/linesv/slide/{}.jpg'.format(name), save_path='../../data/linesv/patch_unet/test/')"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "ecdb4f79-cfa5-4511-bf67-21bbe26ba403",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:43.651072Z",
"start_time": "2024-07-08T02:11:43.649793Z"
}
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 75,
"id": "ee0f80af-0c81-43fc-b879-002ad63413e8",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-08T02:11:43.652659Z",
"start_time": "2024-07-08T02:11:43.651529Z"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}