深度煉丹:用 GAN 生成 CG 圖片,可以直接在瀏覽器運行的深度卷積網路生成模型
深度學習最重要的是什麼?最重要的是數據。昨天爬了幾萬張高質量 CG,拿最簡單的 DCGAN 跑了幾百個 epoch:
雖然不一定看得出在畫什麼,但是色彩確實已經有些藝術的感覺。如果把模型做細緻一點,生成更大的圖片,或許某些可以騙進現代藝術展。
我把它做成了 JS 程序,讀者可以點擊打開下面這個網頁,自己生成無窮無盡的圖片(建議在電腦打開,因為計算量較大):
深度煉丹:用 GAN 生成 CG 圖片 Use GAN to generate CG artworks
按目前的經驗,如果用 state-of-the-art 的網路來跑,同時配合更多的輸入數據,效果會好得多,可以在這個基礎上進一步達到「看得出在畫什麼」的程度。
那麼,未來 CG 畫師是否會受到衝擊?其實也有可能是 AI 提供輔助工具,讓作畫的過程更方便快捷,就像 「一鍵補充細節」,「一鍵轉換風格」,「一鍵生成3D」 等等。
如果覺得此文有幫助,請記得點個贊噢。如需轉載本文,請先與本人聯繫,謝謝。
說說過程:
- 先把圖片爬下來,剪切縮放到尺寸。
- 然後用 MXNet 的 im2rec 做成可以直接讀取的數據文件。
- 然後在 jupyter notebook 運行代碼即可。
- 可以用 MXNet 的 amalgamation 做成可以在瀏覽器運行。
另外,如果有 MXNet 開發團隊的同學看到,很建議加上 amalgamation 後 WebGL 加速的支持。現在 TF 有 TensorFire 了,Keras 早就有 Keras.js 了,這確實是潮流,加個 weBLAS 之類並不會很難。或者也可以給大家一個 MXNet 轉 TF 模型的工具,會很方便許多事情。
訓練代碼,基本就是 MXNet 現成的:
from __future__ import print_functionnimport mxnet as mxnimport numpy as npnfrom matplotlib import pyplot as pltnimport loggingnimport cv2nfrom datetime import datetimennloadG = None #"imagenet_G_2017_08_11-20_24-0090.params"nloadD = None #"imagenet_D_2017_08_11-20_24-0090.params"nndef make_dcgan_sym(ngf, ndf, nc, no_bias=True, fix_gamma=True, eps=1e-5 + 1e-12):n BatchNorm = mx.sym.BatchNormn rand = mx.sym.Variable(rand)nn g1 = mx.sym.Deconvolution(rand, name=g1, kernel=(4,4), num_filter=ngf*8, no_bias=no_bias)n gbn1 = BatchNorm(g1, name=gbn1, fix_gamma=fix_gamma, eps=eps)n gact1 = mx.sym.Activation(gbn1, name=gact1, act_type=relu)nn g2 = mx.sym.Deconvolution(gact1, name=g2, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=ngf*4, no_bias=no_bias)n gbn2 = BatchNorm(g2, name=gbn2, fix_gamma=fix_gamma, eps=eps)n gact2 = mx.sym.Activation(gbn2, name=gact2, act_type=relu)nn g3 = mx.sym.Deconvolution(gact2, name=g3, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=ngf*2, no_bias=no_bias)n gbn3 = BatchNorm(g3, name=gbn3, fix_gamma=fix_gamma, eps=eps)n gact3 = mx.sym.Activation(gbn3, name=gact3, act_type=relu)nn g4 = mx.sym.Deconvolution(gact3, name=g4, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=ngf, no_bias=no_bias)n gbn4 = BatchNorm(g4, name=gbn4, fix_gamma=fix_gamma, eps=eps)n gact4 = mx.sym.Activation(gbn4, name=gact4, act_type=relu)nn g5 = mx.sym.Deconvolution(gact4, name=g5, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=nc, no_bias=no_bias)n gout = mx.sym.Activation(g5, name=gact5, act_type=tanh)nn data = mx.sym.Variable(data)n label = mx.sym.Variable(label)nn d_fix_gamma = Falsenn d1 = mx.sym.Convolution(data, name=d1, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=ndf, no_bias=no_bias)n dact1 = mx.sym.LeakyReLU(d1, name=dact1, act_type=leaky, slope=0.2)nn d2 = mx.sym.Convolution(dact1, name=d2, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=ndf*2, no_bias=no_bias)n dbn2 = BatchNorm(d2, name=dbn2, fix_gamma=d_fix_gamma, eps=eps)n dact2 = mx.sym.LeakyReLU(dbn2, name=dact2, act_type=leaky, slope=0.2)nn d3 = mx.sym.Convolution(dact2, name=d3, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=ndf*4, no_bias=no_bias)n dbn3 = BatchNorm(d3, name=dbn3, fix_gamma=d_fix_gamma, eps=eps)n dact3 = mx.sym.LeakyReLU(dbn3, name=dact3, act_type=leaky, slope=0.2)nn d4 = mx.sym.Convolution(dact3, name=d4, kernel=(4,4), stride=(2,2), pad=(1,1), num_filter=ndf*8, no_bias=no_bias)n dbn4 = BatchNorm(d4, name=dbn4, fix_gamma=d_fix_gamma, eps=eps)n dact4 = mx.sym.LeakyReLU(dbn4, name=dact4, act_type=leaky, slope=0.2)nn d5 = mx.sym.Convolution(dact4, name=d5, kernel=(4,4), num_filter=1, no_bias=no_bias)n d5 = mx.sym.Flatten(d5)nn dloss = mx.sym.LogisticRegressionOutput(data=d5, label=label, name=dloss)n return gout, dlossnnclass RandIter(mx.io.DataIter):n def __init__(self, batch_size, ndim):n self.batch_size = batch_sizen self.ndim = ndimn self.provide_data = [(rand, (batch_size, ndim, 1, 1))]n self.provide_label = []nn def iter_next(self):n return Truenn def getdata(self):n return [mx.random.normal(0, 1.0, shape=(self.batch_size, self.ndim, 1, 1))]nnclass ImagenetIter(mx.io.DataIter):n def __init__(self, path, batch_size, data_shape):n self.internal = mx.io.ImageRecordIter(n path_imgrec = path,n data_shape = data_shape,n batch_size = batch_size,n preprocess_threads = 1,n shuffle = True,n min_img_size = 64,n max_img_size = 64,n rand_crop = False,n rand_mirror = False)n self.provide_data = [(data, (batch_size,) + data_shape)]n self.provide_label = []nn def reset(self):n self.internal.reset()nn def iter_next(self):n return self.internal.iter_next()nn def getdata(self):n data = self.internal.getdata()n data = data * (2.0/255.0)n data -= 1n return [data]nndef fill_buf(buf, i, img, shape):n n = buf.shape[0]/shape[1]n m = buf.shape[1]/shape[0]nn sx = (i%m)*shape[0]n sy = (i/m)*shape[1]n buf[sy:sy+shape[1], sx:sx+shape[0], :] = imgnndef visual(title, X):n assert len(X.shape) == 4n X = X.transpose((0, 2, 3, 1))n X = np.clip((X+1.0)*(255.0/2.0), 0, 255).astype(np.uint8)n n = np.ceil(np.sqrt(X.shape[0]))n buff = np.zeros((int(n*X.shape[1]), int(n*X.shape[2]), int(X.shape[3])), dtype=np.uint8)n for i, img in enumerate(X):n fill_buf(buff, i, img, X.shape[1:3])n #buff = cv2.cvtColor(buff, cv2.COLOR_BGR2RGB)n plt.imshow(buff)n plt.title(title)nplt.show()nnif __name__ == __main__:n logging.basicConfig(level=logging.DEBUG)nn # =============setting============n dataset = imagenetn imgnet_path = img64.recn ndf = 64n ngf = 64n nc = 3n batch_size = 64n Z = 100n lr = 0.0002n beta1 = 0.5n ctx = mx.gpu(0)n check_point = Truenn symG, symD = make_dcgan_sym(ngf, ndf, nc)n #mx.viz.plot_network(symG, shape={rand: (batch_size, 100, 1, 1)}).view()n #mx.viz.plot_network(symD, shape={data: (batch_size, nc, 64, 64)}).view()nn # ==============data==============n train_iter = ImagenetIter(imgnet_path, batch_size, (3, 64, 64))n rand_iter = RandIter(batch_size, Z)n label = mx.nd.zeros((batch_size,), ctx=ctx)nn # =============module G=============n modG = mx.mod.Module(symbol=symG, data_names=(rand,), label_names=None, context=ctx)n modG.bind(data_shapes=rand_iter.provide_data)nn if loadG:n modG.init_params(initializer=mx.init.Load(loadG))n else:n modG.init_params(initializer=mx.init.Normal(0.02))nn modG.init_optimizer(n optimizer=adam,n optimizer_params={n learning_rate: lr,n wd: 0.,n beta1: beta1,n })n mods = [modG]nn # =============module D=============n modD = mx.mod.Module(symbol=symD, data_names=(data,), label_names=(label,), context=ctx)n modD.bind(data_shapes=train_iter.provide_data,n label_shapes=[(label, (batch_size,))],n inputs_need_grad=True)nn if loadD:n modD.init_params(initializer=mx.init.Load(loadD))n else:n modD.init_params(initializer=mx.init.Normal(0.02))nn modD.init_optimizer(n optimizer=adam,n optimizer_params={n learning_rate: lr,n wd: 0.,n beta1: beta1,n })n mods.append(modD)nn # ============printing==============n def norm_stat(d):n return mx.nd.norm(d)/np.sqrt(d.size)n mon = mx.mon.Monitor(10, norm_stat, pattern=".*output|d1_backward_data", sort=True)n mon = Nonen if mon is not None:n for mod in mods:n passnn def facc(label, pred):n pred = pred.ravel()n label = label.ravel()n return ((pred > 0.5) == label).mean()nn def fentropy(label, pred):n pred = pred.ravel()n label = label.ravel()n return -(label*np.log(pred+1e-12) + (1.-label)*np.log(1.-pred+1e-12)).mean()nn mG = mx.metric.CustomMetric(fentropy)n mD = mx.metric.CustomMetric(fentropy)n mACC0 = mx.metric.CustomMetric(facc)n mACC1 = mx.metric.CustomMetric(facc)nn print(Training...)n stamp = datetime.now().strftime(%Y_%m_%d-%H_%M)nnsymG.save(%s_G_%s.json%(dataset, stamp))nsymD.save(%s_D_%s.json%(dataset, stamp))nn # =============train===============n for epoch in range(500+1):n train_iter.reset()n for t, batch in enumerate(train_iter):n rbatch = rand_iter.next()nn if mon is not None:n mon.tic()nn modG.forward(rbatch, is_train=True)n outG = modG.get_outputs()nn # update discriminator on faken label[:] = 0n modD.forward(mx.io.DataBatch(outG, [label]), is_train=True)n modD.backward()n #modD.update()n gradD = [[grad.copyto(grad.context) for grad in grads] for grads in modD._exec_group.grad_arrays]nn modD.update_metric(mD, [label])n modD.update_metric(mACC0, [label])nn # update discriminator on realn label[:] = 1n batch.label = [label]n modD.forward(batch, is_train=True)n modD.backward()n for gradsr, gradsf in zip(modD._exec_group.grad_arrays, gradD):n for gradr, gradf in zip(gradsr, gradsf):n gradr += gradfn modD.update()nn modD.update_metric(mD, [label])n modD.update_metric(mACC1, [label])nn # update generatorn label[:] = 1n modD.forward(mx.io.DataBatch(outG, [label]), is_train=True)n modD.backward()n diffD = modD.get_input_grads()n modG.backward(diffD)n modG.update()nn mG.update([label], modD.get_outputs())nnn if mon is not None:n mon.toc_print()nn t += 1n if t % 300 == 0:n print(epoch:, epoch, iter:, t, metric:, mACC0.get(), mACC1.get(), mG.get(), mD.get())n mACC0.reset()n mACC1.reset()n mG.reset()n mD.reset()nn visual(gout, outG[0].asnumpy())n# diff = diffD[0].asnumpy()n# diff = (diff - diff.mean())/diff.std()n# visual(diff, diff)n visual(data, batch.data[0].asnumpy())nn if check_point and epoch % 10 == 0:n print(Saving...)n modG.save_params(%s_G_%s-%04d.params%(dataset, stamp, epoch))n modD.save_params(%s_D_%s-%04d.params%(dataset, stamp, epoch))n
如果覺得此文有幫助,請記得點個贊噢。如需轉載本文,請先與本人聯繫,謝謝。
歡迎關注我的專欄: 技術備忘錄 - 知乎專欄,其中有更多文章。
推薦閱讀:
※機器之心獨家解讀:華為首款手機端AI晶元麒麟970
※看看島國機器人格鬥大賽的選手們
TAG:深度学习DeepLearning | 生成对抗网络GAN | 人工智能 |