add competition

This commit is contained in:
zhouwy19 2022-04-19 10:33:09 +08:00
parent ceecf0f6bd
commit 233e2ff92f
8 changed files with 691 additions and 0 deletions

View File

@ -26,6 +26,7 @@ In another form of presentation, assuming that Pytorch's training time is 100 ho
## Table of Contents
* [Installation](#installation)
* [第二届计图人工智能挑战赛](#第二届计图人工智能挑战赛)
* [models](#models)
+ [Auxiliary Classifier GAN](#auxiliary-classifier-gan)
+ [Adversarial Autoencoder](#adversarial-autoencoder)
@ -60,6 +61,39 @@ In another form of presentation, assuming that Pytorch's training time is 100 ho
$ cd gan-jittor/
$ sudo python3.7 -m pip install -r requirements.txt
## 第二届计图人工智能挑战赛
### 计图挑战热身赛
本赛题将会提供数字图片数据集 MNIST参赛选手需要训练一个将随机噪声和类别标签映射为数字图片的Conditional GAN模型并生成注册时绑定的手机号如果没有绑定手机号请先绑定再进行提交
本赛题提供示例代码框架,提供数据下载、模型定义、训练步骤等功能。
选手可以基于示例代码填充注释为 TODO 的部分完成该赛题。
```
cd competition/warm_up_comp
修改 CGAN.py 使其运行
```
### 赛题一:风景图片生成赛题
图像生成任务一直以来都是十分具有应用场景的计算机视觉任务,从语义分割图生成有意义、高质量的图片仍然存在诸多挑战,如保证生成图片的真实性、清晰程度、多样性、美观性等。
清华大学计算机系图形学实验室从Flickr官网收集了1万张高清宽1024、高768的风景图片并制作了它们的语义分割图。其中1万对图片被用来训练。训练数据集可以从[这里](https://cloud.tsinghua.edu.cn/f/1d734cbb68b545d6bdf2/?dl=1)下载。
```
cd competition/landscape_comp
# 单卡训练,需要修改脚本里的数据路径
bash scripts/single_gpu.sh
# 多卡训练,需要修改脚本里的数据路径
bash scripts/multi_gpu.sh
```
代码中注释掉了eval的部分等到测试数据发布之后您可以取消注释进行评测。也可在训练阶段自动分配一部分数据集为测试集进行训练。
## models
### Auxiliary Classifier GAN
_Auxiliary Classifier Generative Adversarial Network_

View File

@ -0,0 +1,37 @@
import glob
import random
import os
import numpy as np
from jittor.dataset.dataset import Dataset
import jittor.transform as transform
from PIL import Image
class ImageDataset(Dataset):
def __init__(self, root, mode="train", transforms=None):
super().__init__()
self.transforms = transform.Compose(transforms)
self.mode = mode
if self.mode == 'train':
self.files = sorted(glob.glob(os.path.join(root, mode, "imgs") + "/*.*"))
self.labels = sorted(glob.glob(os.path.join(root, mode, "labels") + "/*.*"))
self.set_attrs(total_len=len(self.labels))
print(f"from {mode} split load {self.total_len} images.")
def __getitem__(self, index):
label_path = self.labels[index % len(self.labels)]
photo_id = label_path.split('/')[-1][:-4]
img_B = Image.open(label_path)
img_B = Image.fromarray(np.array(img_B).astype("uint8")[:, :, np.newaxis].repeat(3,2))
if self.mode == "train":
img_A = Image.open(self.files[index % len(self.files)])
if np.random.random() < 0.5:
img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], "RGB")
img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], "RGB")
img_A = self.transforms(img_A)
else:
img_A = np.empty([1])
img_B = self.transforms(img_B)
return img_A, img_B, photo_id

View File

@ -0,0 +1,177 @@
import jittor as jt
from jittor import init
from jittor import nn
def start_grad(model):
for param in model.parameters():
param.start_grad()
def stop_grad(model):
for param in model.parameters():
param.stop_grad()
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
jt.init.gauss_(m.weight, 0.0, 0.02)
elif classname.find("BatchNorm") != -1:
jt.init.gauss_(m.weight, 1.0, 0.02)
jt.init.constant_(m.bias, 0.0)
class UNetDown(nn.Module):
def __init__(self, in_size, out_size, normalize=True, dropout=0.0):
super(UNetDown, self).__init__()
layers = [nn.Conv(in_size, out_size, 4, stride=2, padding=1, bias=False)]
if normalize:
layers.append(nn.InstanceNorm2d(out_size, affine=None))
layers.append(nn.LeakyReLU(scale=0.2))
if dropout:
layers.append(nn.Dropout(dropout))
self.model = nn.Sequential(*layers)
def execute(self, x):
return self.model(x)
class UNetUp(nn.Module):
def __init__(self, in_size, out_size, dropout=0.0):
super(UNetUp, self).__init__()
layers = [nn.ConvTranspose(in_size, out_size, 4, stride=2, padding=1, bias=False), nn.InstanceNorm2d(out_size, affine=None), nn.ReLU()]
if dropout:
layers.append(nn.Dropout(dropout))
self.model = nn.Sequential(*layers)
def execute(self, x, skip_input):
x = self.model(x)
x = jt.contrib.concat((x, skip_input), dim=1)
return x
class GeneratorUNet(nn.Module):
def __init__(self, in_channels=3, out_channels=3):
super(GeneratorUNet, self).__init__()
self.down1 = UNetDown(in_channels, 64, normalize=False)
self.down2 = UNetDown(64, 128)
self.down3 = UNetDown(128, 256)
self.down4 = UNetDown(256, 512, dropout=0.5)
self.down5 = UNetDown(512, 512, dropout=0.5)
self.down6 = UNetDown(512, 512, dropout=0.5)
self.down7 = UNetDown(512, 512, dropout=0.5)
self.down8 = UNetDown(512, 512, normalize=False, dropout=0.5)
self.up1 = UNetUp(512, 512, dropout=0.5)
self.up2 = UNetUp(1024, 512, dropout=0.5)
self.up3 = UNetUp(1024, 512, dropout=0.5)
self.up4 = UNetUp(1024, 512, dropout=0.5)
self.up5 = UNetUp(1024, 256)
self.up6 = UNetUp(512, 128)
self.up7 = UNetUp(256, 64)
self.final = nn.Sequential(nn.Upsample(scale_factor=2), nn.ZeroPad2d((1, 0, 1, 0)), nn.Conv(128, out_channels, 4, padding=1), nn.Tanh())
for m in self.modules():
weights_init_normal(m)
def execute(self, x):
d1 = self.down1(x)
d2 = self.down2(d1)
d3 = self.down3(d2)
d4 = self.down4(d3)
d5 = self.down5(d4)
d6 = self.down6(d5)
d7 = self.down7(d6)
d8 = self.down8(d7)
u1 = self.up1(d8, d7)
u2 = self.up2(u1, d6)
u3 = self.up3(u2, d5)
u4 = self.up4(u3, d4)
u5 = self.up5(u4, d3)
u6 = self.up6(u5, d2)
u7 = self.up7(u6, d1)
return self.final(u7)
class UnetGenerator(nn.Module):
def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False):
super(UnetGenerator, self).__init__()
# construct unet structure
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer
for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
# gradually reduce the number of filters from ngf * 8 to ngf
unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer
def execute(self, input):
return self.model(input)
class UnetSkipConnectionBlock(nn.Module):
def __init__(self, outer_nc, inner_nc, input_nc=None,
submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
super(UnetSkipConnectionBlock, self).__init__()
self.outermost = outermost
if input_nc is None:
input_nc = outer_nc
downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
stride=2, padding=1, bias=False)
downrelu = nn.LeakyReLU(0.2)
downnorm = norm_layer(inner_nc)
uprelu = nn.ReLU()
upnorm = norm_layer(outer_nc)
if outermost:
upconv = nn.ConvTranspose(inner_nc * 2, outer_nc,
kernel_size=4, stride=2,
padding=1)
down = [downconv]
up = [uprelu, upconv, nn.Tanh()]
model = down + [submodule] + up
elif innermost:
upconv = nn.ConvTranspose(inner_nc, outer_nc,
kernel_size=4, stride=2,
padding=1, bias=False)
down = [downrelu, downconv]
up = [uprelu, upconv, upnorm]
model = down + up
else:
upconv = nn.ConvTranspose(inner_nc * 2, outer_nc,
kernel_size=4, stride=2,
padding=1, bias=False)
down = [downrelu, downconv, downnorm]
up = [uprelu, upconv, upnorm]
if use_dropout:
model = down + [submodule] + up + [nn.Dropout(0.5)]
else:
model = down + [submodule] + up
self.model = nn.Sequential(*model)
def execute(self, x):
if self.outermost:
return self.model(x)
else: # add skip connections
return jt.contrib.concat([x, self.model(x)], 1)
class Discriminator(nn.Module):
def __init__(self, in_channels=3):
super(Discriminator, self).__init__()
def discriminator_block(in_filters, out_filters, stride=2, normalization=True):
'Returns downsampling layers of each discriminator block'
layers = [nn.Conv(in_filters, out_filters, 4, stride=stride, padding=1)]
if normalization:
layers.append(nn.BatchNorm2d(out_filters, eps=1e-05, momentum=0.1, affine=True))
layers.append(nn.LeakyReLU(scale=0.2))
return layers
self.model = nn.Sequential(*discriminator_block((in_channels * 2), 64, normalization=False), *discriminator_block(64, 128), *discriminator_block(128, 256), *discriminator_block(256, 512, stride=1), nn.Conv(512, 1, 4, padding=1, bias=False))
for m in self.modules():
weights_init_normal(m)
def execute(self, input):
return self.model(input)

View File

@ -0,0 +1,209 @@
import jittor as jt
from jittor import init
from jittor import nn
import jittor.transform as transform
import argparse
import os
import numpy as np
import math
import itertools
import time
import datetime
import sys
import cv2
import time
from models import *
from datasets import *
from tensorboardX import SummaryWriter
import warnings
warnings.filterwarnings("ignore")
jt.flags.use_cuda = 1
parser = argparse.ArgumentParser()
parser.add_argument("--epoch", type=int, default=0, help="epoch to start training from")
parser.add_argument("--n_epochs", type=int, default=200, help="number of epochs of training")
parser.add_argument("--data_path", type=str, default="./jittor_landscape_200k")
parser.add_argument("--output_path", type=str, default="./results/flickr")
parser.add_argument("--batch_size", type=int, default=32, help="size of the batches")
parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
parser.add_argument("--decay_epoch", type=int, default=100, help="epoch from which to start lr decay")
parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
parser.add_argument("--img_height", type=int, default=384, help="size of image height")
parser.add_argument("--img_width", type=int, default=512, help="size of image width")
parser.add_argument("--channels", type=int, default=3, help="number of image channels")
parser.add_argument(
"--sample_interval", type=int, default=500, help="interval between sampling of images from generators"
)
parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between model checkpoints")
opt = parser.parse_args()
print(opt)
def save_image(img, path, nrow=10):
N,C,W,H = img.shape
if (N%nrow!=0):
print("save_image error: N%nrow!=0")
return
img=img.transpose((1,0,2,3))
ncol=int(N/nrow)
img2=img.reshape([img.shape[0],-1,H])
img=img2[:,:W*ncol,:]
for i in range(1,int(img2.shape[1]/W/ncol)):
img=np.concatenate([img,img2[:,W*ncol*i:W*ncol*(i+1),:]],axis=2)
min_=img.min()
max_=img.max()
img=(img-min_)/(max_-min_)*255
img=img.transpose((1,2,0))
if C==3:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imwrite(path,img)
return img
os.makedirs(f"{opt.output_path}/images/", exist_ok=True)
os.makedirs(f"{opt.output_path}/saved_models/", exist_ok=True)
writer = SummaryWriter(opt.output_path)
# Loss functions
criterion_GAN = nn.BCEWithLogitsLoss()
criterion_pixelwise = nn.L1Loss()
# Loss weight of L1 pixel-wise loss between translated image and real image
lambda_pixel = 100
# Calculate output of image discriminator (PatchGAN)
patch = (1, opt.img_height // 2 ** 4, opt.img_width // 2 ** 4)
# Initialize generator and discriminator
generator = UnetGenerator(3, 3, 7, 64, norm_layer=nn.BatchNorm2d, use_dropout=True)
discriminator = Discriminator()
if opt.epoch != 0:
# Load pretrained models
generator.load(f"{opt.output_path}/saved_models/generator_{opt.epoch}.pkl")
discriminator.load(f"{opt.output_path}/saved_models/discriminator_{opt.epoch}.pkl")
# Optimizers
optimizer_G = jt.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = jt.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
# Configure dataloaders
transforms = [
transform.Resize(size=(opt.img_height, opt.img_width), mode=Image.BICUBIC),
transform.ToTensor(),
transform.ImageNormalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
]
dataloader = ImageDataset(opt.data_path, mode="train", transforms=transforms).set_attrs(
batch_size=opt.batch_size,
shuffle=True,
num_workers=opt.n_cpu,
)
# val_dataloader = ImageDataset(opt.data_path, mode="val", transforms=transforms).set_attrs(
# batch_size=10,
# shuffle=False,
# num_workers=1,
# )
# @jt.single_process_scope()
# def eval(epoch, writer):
# cnt = 1
# os.makedirs(f"{opt.output_path}/images/test_fake_imgs/epoch_{epoch}", exist_ok=True)
# for i, (_, real_A, photo_id) in enumerate(val_dataloader):
# fake_B = generator(real_A)
# if i == 0:
# # visual image result
# img_sample = np.concatenate([real_A.data, fake_B.data], -2)
# img = save_image(img_sample, f"{opt.output_path}/images/epoch_{epoch}_sample.png", nrow=5)
# writer.add_image('val/image', img.transpose(2,0,1), epoch)
# fake_B = ((fake_B + 1) / 2 * 255).numpy().astype('uint8')
# for idx in range(fake_B.shape[0]):
# cv2.imwrite(f"{opt.output_path}/images/test_fake_imgs/epoch_{epoch}/{photo_id[idx]}.jpg", fake_B[idx].transpose(1,2,0)[:,:,::-1])
# cnt += 1
warmup_times = -1
run_times = 3000
total_time = 0.
cnt = 0
# ----------
# Training
# ----------
prev_time = time.time()
for epoch in range(opt.epoch, opt.n_epochs):
for i, (real_B, real_A, _) in enumerate(dataloader):
# Adversarial ground truths
valid = jt.ones([real_A.shape[0], 1]).stop_grad()
fake = jt.zeros([real_A.shape[0], 1]).stop_grad()
fake_B = generator(real_A)
# ---------------------
# Train Discriminator
# ---------------------
start_grad(discriminator)
fake_AB = jt.contrib.concat((real_A, fake_B), 1)
pred_fake = discriminator(fake_AB.detach())
loss_D_fake = criterion_GAN(pred_fake, False)
real_AB = jt.contrib.concat((real_A, real_B), 1)
pred_real = discriminator(real_AB)
loss_D_real = criterion_GAN(pred_real, True)
loss_D = (loss_D_fake + loss_D_real) * 0.5
optimizer_D.step(loss_D)
writer.add_scalar('train/loss_D', loss_D.item(), epoch * len(dataloader) + i)
# ------------------
# Train Generators
# ------------------
stop_grad(discriminator)
fake_AB = jt.contrib.concat((real_A, fake_B), 1)
pred_fake = discriminator(fake_AB)
loss_G_GAN = criterion_GAN(pred_fake, True)
loss_G_L1 = criterion_pixelwise(fake_B, real_B)
loss_G = loss_G_GAN + lambda_pixel * loss_G_L1
optimizer_G.step(loss_G)
writer.add_scalar('train/loss_G', loss_G.item(), epoch * len(dataloader) + i)
jt.sync_all(True)
if jt.rank == 0:
# --------------
# Log Progress
# --------------
# Determine approximate time left
batches_done = epoch * len(dataloader) + i
batches_left = opt.n_epochs * len(dataloader) - batches_done
time_left = datetime.timedelta(seconds=batches_left * (time.time() - prev_time))
prev_time = time.time()
# Print log
jt.sync_all()
if batches_done % 5 == 0:
sys.stdout.write(
"\r[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f, pixel: %f, adv: %f] ETA: %s"
% (
epoch,
opt.n_epochs,
i,
len(dataloader),
loss_D.numpy()[0],
loss_G.numpy()[0],
loss_G_L1.numpy()[0],
loss_G_GAN.numpy()[0],
time_left,
)
)
if jt.rank == 0 and opt.checkpoint_interval != -1 and epoch % opt.checkpoint_interval == 0:
# eval(epoch, writer)
# Save model checkpoints
generator.save(os.path.join(f"{opt.output_path}/saved_models/generator_{epoch}.pkl"))
discriminator.save(os.path.join(f"{opt.output_path}/saved_models/discriminator_{epoch}.pkl"))

View File

@ -0,0 +1,24 @@
# Jittor 第二届草图生成风景比赛 baseline
## Requirements
```
jittor
pillow
opencv-python
```
## Train
单卡训练,需要修改脚本里的数据路径
```
bash scripts/single_gpu.sh
```
多卡训练,需要修改脚本里的数据路径
```
bash scripts/multi_gpu.sh
```
代码中注释掉了eval的部分等到测试数据发布之后您可以取消注释进行评测。也可在训练阶段自动分配一部分数据集为测试集进行训练。

View File

@ -0,0 +1 @@
mpirun -np 4 python pix2pix.py --output_path ./results/multi_gpu --batch_size 128 --data_path path_to_your_data

View File

@ -0,0 +1 @@
python pix2pix.py --output_path ./results/single_gpu --batch_size 32 --data_path path_to_your_data

View File

@ -0,0 +1,208 @@
import jittor as jt
from jittor import init
import argparse
import os
import numpy as np
import math
from jittor import nn
if jt.has_cuda:
jt.flags.use_cuda = 1
parser = argparse.ArgumentParser()
parser.add_argument('--n_epochs', type=int, default=100, help='number of epochs of training')
parser.add_argument('--batch_size', type=int, default=64, help='size of the batches')
parser.add_argument('--lr', type=float, default=0.0002, help='adam: learning rate')
parser.add_argument('--b1', type=float, default=0.5, help='adam: decay of first order momentum of gradient')
parser.add_argument('--b2', type=float, default=0.999, help='adam: decay of first order momentum of gradient')
parser.add_argument('--n_cpu', type=int, default=8, help='number of cpu threads to use during batch generation')
parser.add_argument('--latent_dim', type=int, default=100, help='dimensionality of the latent space')
parser.add_argument('--n_classes', type=int, default=10, help='number of classes for dataset')
parser.add_argument('--img_size', type=int, default=32, help='size of each image dimension')
parser.add_argument('--channels', type=int, default=1, help='number of image channels')
parser.add_argument('--sample_interval', type=int, default=1000, help='interval between image sampling')
opt = parser.parse_args()
print(opt)
img_shape = (opt.channels, opt.img_size, opt.img_size)
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.label_emb = nn.Embedding(opt.n_classes, opt.n_classes)
# nn.Linear(in_dim, out_dim)表示全连接层
# in_dim输入向量维度
# out_dim输出向量维度
def block(in_feat, out_feat, normalize=True):
layers = [nn.Linear(in_feat, out_feat)]
if normalize:
layers.append(nn.BatchNorm1d(out_feat, 0.8))
layers.append(nn.LeakyReLU(0.2))
return layers
self.model = nn.Sequential(*block((opt.latent_dim + opt.n_classes), 128, normalize=False),
*block(128, 256),
*block(256, 512),
*block(512, 1024),
nn.Linear(1024, int(np.prod(img_shape))),
nn.Tanh())
def execute(self, noise, labels):
gen_input = jt.contrib.concat((self.label_emb(labels), noise), dim=1)
img = self.model(gen_input)
# 将img从1024维向量变为32*32矩阵
img = img.view((img.shape[0], *img_shape))
return img
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.label_embedding = nn.Embedding(opt.n_classes, opt.n_classes)
self.model = nn.Sequential(nn.Linear((opt.n_classes + int(np.prod(img_shape))), 512),
nn.LeakyReLU(0.2),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2),
# TODO: 添加最后一个线性层,最终输出为一个实数
)
def execute(self, img, labels):
d_in = jt.contrib.concat((img.view((img.shape[0], (- 1))), self.label_embedding(labels)), dim=1)
# TODO: 将d_in输入到模型中并返回计算结果
# 损失函数:平方误差
# 调用方法adversarial_loss(网络输出A, 分类标签B)
# 计算结果:(A-B)^2
adversarial_loss = nn.MSELoss()
generator = Generator()
discriminator = Discriminator()
# 导入MNIST数据集
from jittor.dataset.mnist import MNIST
import jittor.transform as transform
transform = transform.Compose([
transform.Resize(opt.img_size),
transform.Gray(),
transform.ImageNormalize(mean=[0.5], std=[0.5]),
])
dataloader = MNIST(train=True, transform=transform).set_attrs(batch_size=opt.batch_size, shuffle=True)
optimizer_G = nn.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = nn.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
from PIL import Image
def save_image(img, path, nrow=10, padding=5):
N,C,W,H = img.shape
if (N%nrow!=0):
print("N%nrow!=0")
return
ncol=int(N/nrow)
img_all = []
for i in range(ncol):
img_ = []
for j in range(nrow):
img_.append(img[i*nrow+j])
img_.append(np.zeros((C,W,padding)))
img_all.append(np.concatenate(img_, 2))
img_all.append(np.zeros((C,padding,img_all[0].shape[2])))
img = np.concatenate(img_all, 1)
img = np.concatenate([np.zeros((C,padding,img.shape[2])), img], 1)
img = np.concatenate([np.zeros((C,img.shape[1],padding)), img], 2)
min_=img.min()
max_=img.max()
img=(img-min_)/(max_-min_)*255
img=img.transpose((1,2,0))
if C==3:
img = img[:,:,::-1]
elif C==1:
img = img[:,:,0]
Image.fromarray(np.uint8(img)).save(path)
def sample_image(n_row, batches_done):
# 随机采样输入并保存生成的图片
z = jt.array(np.random.normal(0, 1, (n_row ** 2, opt.latent_dim))).float32().stop_grad()
labels = jt.array(np.array([num for _ in range(n_row) for num in range(n_row)])).float32().stop_grad()
gen_imgs = generator(z, labels)
save_image(gen_imgs.numpy(), "%d.png" % batches_done, nrow=n_row)
# ----------
# 模型训练
# ----------
for epoch in range(opt.n_epochs):
for i, (imgs, labels) in enumerate(dataloader):
batch_size = imgs.shape[0]
# 数据标签valid=1表示真实的图片fake=0表示生成的图片
valid = jt.ones([batch_size, 1]).float32().stop_grad()
fake = jt.zeros([batch_size, 1]).float32().stop_grad()
# 真实图片及其类别
real_imgs = jt.array(imgs)
labels = jt.array(labels)
# -----------------
# 训练生成器
# -----------------
# 采样随机噪声和数字类别作为生成器输入
z = jt.array(np.random.normal(0, 1, (batch_size, opt.latent_dim))).float32()
gen_labels = jt.array(np.random.randint(0, opt.n_classes, batch_size)).float32()
# 生成一组图片
gen_imgs = generator(z, gen_labels)
# 损失函数衡量生成器欺骗判别器的能力即希望判别器将生成图片分类为valid
validity = discriminator(gen_imgs, gen_labels)
g_loss = adversarial_loss(validity, valid)
g_loss.sync()
optimizer_G.step(g_loss)
# ---------------------
# 训练判别器
# ---------------------
validity_real = discriminator(real_imgs, labels)
d_real_loss = adversarial_loss("""TODO: 计算真实类别的损失函数""")
validity_fake = discriminator(gen_imgs.stop_grad(), gen_labels)
d_fake_loss = adversarial_loss("""TODO: 计算虚假类别的损失函数""")
# 总的判别器损失
d_loss = (d_real_loss + d_fake_loss) / 2
d_loss.sync()
optimizer_D.step(d_loss)
if i % 50 == 0:
print(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
% (epoch, opt.n_epochs, i, len(dataloader), d_loss.data, g_loss.data)
)
batches_done = epoch * len(dataloader) + i
if batches_done % opt.sample_interval == 0:
sample_image(n_row=10, batches_done=batches_done)
if epoch % 10 == 0:
generator.save("generator_last.pkl")
discriminator.save("discriminator_last.pkl")
generator.eval()
discriminator.eval()
generator.load('generator_last.pkl')
discriminator.load('discriminator_last.pkl')
number = #TODO: 写入你注册时绑定的手机号(字符串类型)
n_row = len(number)
z = jt.array(np.random.normal(0, 1, (n_row, opt.latent_dim))).float32().stop_grad()
labels = jt.array(np.array([int(number[num]) for num in range(n_row)])).float32().stop_grad()
gen_imgs = generator(z,labels)
img_array = gen_imgs.data.transpose((1,2,0,3))[0].reshape((gen_imgs.shape[2], -1))
min_=img_array.min()
max_=img_array.max()
img_array=(img_array-min_)/(max_-min_)*255
Image.fromarray(np.uint8(img_array)).save("result.png")