深度有趣 | 18 二次元头像生成

简介

和CelebA相似,使用ACGAN生成二次元头像,而且用多个条件进行控制git

数据

图片爬取自getchu.com/,是一个日本二次元游戏网站,包含大量游戏人物立绘,共爬取31,970张github

二次元头像示例

头像截取

以前介绍的dlib可用于提取人脸,但不适用于二次元头像json

使用OpenCV从每张图片中截取头像部分,用到如下项目,github.com/nagadomi/lb…bash

对于检测结果适当放大范围,以包含更多人物细节服务器

# -*- coding: utf-8 -*-

import cv2

cascade = cv2.CascadeClassifier('lbpcascade_animeface.xml')

image = cv2.imread('imgs/二次元头像示例.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.equalizeHist(gray)
faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(64, 64))
for i, (x, y, w, h) in enumerate(faces):
    cx = x + w // 2
    cy = y + h // 2
    x0 = cx - int(0.75 * w)
    x1 = cx + int(0.75 * w)
    y0 = cy - int(0.75 * h)
    y1 = cy + int(0.75 * h)
    if x0 < 0:
        x0 = 0
    if y0 < 0:
        y0 = 0
    if x1 >= image.shape[1]:
        x1 = image.shape[1] - 1
    if y1 >= image.shape[0]:
        y1 = image.shape[0] - 1
    w = x1 - x0
    h = y1 - y0
    if w > h:
        x0 = x0 + w // 2 - h // 2
        x1 = x1 - w // 2 + h // 2
        w = h
    else:
        y0 = y0 + h // 2 - w // 2
        y1 = y1 - h // 2 + w // 2
        h = w
    face = image[y0: y0 + h, x0: x0 + w, :]
    face = cv2.resize(face, (128, 128))
    cv2.imwrite('face_%d.jpg' % i, face)
复制代码

二次元头像截取结果示例

标签提取

使用Illustration2Vec从二次元图片中抽取丰富的标签,github.com/rezoo/illus…网络

Illustration2Vec用到chainer这个深度学习框架,以及一些其余库,若是没有则安装app

pip install chainer Pillow scikit-image
复制代码

Illustration2Vec能够完成如下三项功能:框架

  • 将每张图片表示为一个4096维的向量
  • 指定阈值,并提取几率高于阈值的标签
  • 指定一些标签,并返回对应的几率

举个例子,提取所有可能的标签,以0.5为阈值dom

# -*- coding: utf-8 -*-

import i2v
from imageio import imread

illust2vec = i2v.make_i2v_with_chainer('illust2vec_tag_ver200.caffemodel', 'tag_list.json')

img = imread('imgs/二次元头像示例.jpg')
tags = illust2vec.estimate_plausible_tags([img], threshold=0.5)
print(tags)
tags = illust2vec.estimate_specific_tags([img], ['blue eyes', 'red hair'])
print(tags)
复制代码

也能够指定标签并获取对应的几率ide

[{'blue eyes': 0.9488178491592407, 'red hair': 0.0025324225425720215}]
复制代码

预处理

在服务器上处理所有图片,即截取头像、提取标签

加载库

# -*- coding: utf-8 -*-

import i2v
import cv2
import glob
import os
from imageio import imread
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pickle
复制代码

读取图片路径

images = glob.glob('characters/*.jpg')
print(len(images))
复制代码

加载两个模型

illust2vec = i2v.make_i2v_with_chainer('illust2vec_tag_ver200.caffemodel', 'tag_list.json')
cascade = cv2.CascadeClassifier('lbpcascade_animeface.xml')
OUTPUT_DIR = 'faces/'
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)
复制代码

提取所有头像,共检测到27772张

num = 0
for x in tqdm(range(len(images))):
    img_path = images[x]
    image = cv2.imread(img_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(64, 64))
    for (x, y, w, h) in faces:
        cx = x + w // 2
        cy = y + h // 2
        x0 = cx - int(0.75 * w)
        x1 = cx + int(0.75 * w)
        y0 = cy - int(0.75 * h)
        y1 = cy + int(0.75 * h)
        if x0 < 0:
            x0 = 0
        if y0 < 0:
            y0 = 0
        if x1 >= image.shape[1]:
            x1 = image.shape[1] - 1
        if y1 >= image.shape[0]:
            y1 = image.shape[0] - 1
        w = x1 - x0
        h = y1 - y0
        if w > h:
            x0 = x0 + w // 2 - h // 2
            x1 = x1 - w // 2 + h // 2
            w = h
        else:
            y0 = y0 + h // 2 - w // 2
            y1 = y1 - h // 2 + w // 2
            h = w
        
        face = image[y0: y0 + h, x0: x0 + w, :]
        face = cv2.resize(face, (128, 128))
        cv2.imwrite(os.path.join(OUTPUT_DIR, '%d.jpg' % num), face)
        num += 1
print(num)
复制代码

感兴趣的标签包括如下34个:

  • 13种头发颜色:blonde hair, brown hair, black hair, blue hair, pink hair, purple hair, green hair, red hair, silver hair, white hair, orange hair, aqua hair, grey hair
  • 5种发型:long hair, short hair, twintails, drill hair, ponytail
  • 10种眼睛颜色:blue eyes, red eyes, brown eyes, green eyes, purple eyes, yellow eyes, pink eyes, aqua eyes, black eyes, orange eyes
  • 6种其余属性:blush, smile, open mouth, hat, ribbon, glasses

头发颜色、发型和眼睛颜色取几率最高的一种,其余属性几率高于0.25则以存在处理

fw = open('face_tags.txt', 'w')
tags = ['blonde hair', 'brown hair', 'black hair', 'blue hair', 'pink hair', 'purple hair', 'green hair', 
        'red hair', 'silver hair', 'white hair', 'orange hair', 'aqua hair', 'grey hair',
        'long hair', 'short hair', 'twintails', 'drill hair', 'ponytail',
        'blue eyes', 'red eyes', 'brown eyes', 'green eyes', 'purple eyes', 'yellow eyes', 'pink eyes', 
        'aqua eyes', 'black eyes', 'orange eyes',
        'blush', 'smile', 'open mouth', 'hat', 'ribbon', 'glasses']
fw.write('id,' + ','.join(tags) + '\n')

images = glob.glob(os.path.join(OUTPUT_DIR, '*.jpg'))
for x in tqdm(range(len(images))):
    img_path = images[x]
    image = imread(img_path)
    result = illust2vec.estimate_specific_tags([image], tags)[0]
    
    hair_colors = [[h, result[h]] for h in tags[0:13]]
    hair_colors.sort(key=lambda x:x[1], reverse=True)
    for h in tags[0:13]:
        if h == hair_colors[0][0]:
            result[h] = 1
        else:
            result[h] = 0
    
    hair_styles = [[h, result[h]] for h in tags[13:18]]
    hair_styles.sort(key=lambda x:x[1], reverse=True)
    for h in tags[13:18]:
        if h == hair_styles[0][0]:
            result[h] = 1
        else:
            result[h] = 0
    
    eye_colors = [[h, result[h]] for h in tags[18:28]]
    eye_colors.sort(key=lambda x:x[1], reverse=True)
    for h in tags[18:28]:
        if h == eye_colors[0][0]:
            result[h] = 1
        else:
            result[h] = 0
    
    for h in tags[28:]:
        if result[h] > 0.25:
            result[h] = 1
        else:
            result[h] = 0
        
    fw.write(img_path + ',' + ','.join([str(result[t]) for t in tags]) + '\n')
    
fw.close()
复制代码

这样一来,便获得了27772张二次元头像,以及每张头像对应的34个标签值

获取每张头像的4096维向量表示

illust2vec = i2v.make_i2v_with_chainer("illust2vec_ver200.caffemodel")
img_all = []
vec_all = []
for x in tqdm(range(len(images))):
    img_path = images[x]
    image = imread(img_path)
    vector = illust2vec.extract_feature([image])[0]
    img_all.append(image / 255.)
    vec_all.append(vector)
img_all = np.array(img_all)
vec_all = np.array(vec_all)
复制代码

随机选择2000张头像,进行tSNE降维可视化

from sklearn.manifold import TSNE
from imageio import imsave
data_index = np.arange(img_all.shape[0])
np.random.shuffle(data_index)
data_index = data_index[:2000]

tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
two_d_vectors = tsne.fit_transform(vec_all[data_index, :])
puzzles = np.ones((6400, 6400, 3))
xmin = np.min(two_d_vectors[:, 0])
xmax = np.max(two_d_vectors[:, 0])
ymin = np.min(two_d_vectors[:, 1])
ymax = np.max(two_d_vectors[:, 1])

for i, vector in enumerate(two_d_vectors):
    x, y = two_d_vectors[i, :]
    x = int((x - xmin) / (xmax - xmin) * (6400 - 128) + 64)
    y = int((y - ymin) / (ymax - ymin) * (6400 - 128) + 64)
    puzzles[y - 64: y + 64, x - 64: x + 64, :] = img_all[data_index[i]]
imsave('二次元头像降维可视化.png', puzzles)
复制代码

可视化结果以下,类似的头像确实被聚到了一块儿

二次元头像降维可视化

模型

使用ACGAN结构,但和CelebA中用的DCGAN不一样,此次使用更深更复杂的网络来实现G和D,参考自SRGAN,arxiv.org/abs/1609.04…

生成器结构以下:

  • 使用16个残差块,即ResNet中的shortcut思想
  • 使用Sub-pixel CNN代替deconvolution,arxiv.org/abs/1609.05…

生成器网络结构

Sub-pixel CNN原理以下,把多个层拼接成一个层,从而达到增长高度和宽度、减小深度的目的

Sub-pixel CNN原理

判别器结构以下,使用10个残差块,输出端包括两支,分别完成判别和分类任务

判别器网络结构

实现

加载库

# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from imageio import imread, imsave, mimsave
import glob
from tqdm import tqdm
复制代码

加载图片

images = glob.glob('faces/*.jpg')
print(len(images))
复制代码

加载标签

tags = pd.read_csv('face_tags.txt')
tags.index = tags['id']
tags.head()
复制代码

定义一些常量、网络tensor、辅助函数,批大小设为2的幂比较合适,这里设为64,考虑学习率衰减

batch_size = 64
z_dim = 128
WIDTH = 128
HEIGHT = 128
LABEL = 34
LAMBDA = 0.05
BETA = 3

OUTPUT_DIR = 'samples'
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)

X = tf.placeholder(dtype=tf.float32, shape=[batch_size, HEIGHT, WIDTH, 3], name='X')
X_perturb = tf.placeholder(dtype=tf.float32, shape=[batch_size, HEIGHT, WIDTH, 3], name='X_perturb')
Y = tf.placeholder(dtype=tf.float32, shape=[batch_size, LABEL], name='Y')
noise = tf.placeholder(dtype=tf.float32, shape=[batch_size, z_dim], name='noise')
noise_y = tf.placeholder(dtype=tf.float32, shape=[batch_size, LABEL], name='noise_y')
is_training = tf.placeholder(dtype=tf.bool, name='is_training')

global_step = tf.Variable(0, trainable=False)
add_global = global_step.assign_add(1)
initial_learning_rate = 0.0002
learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step=global_step, decay_steps=20000, decay_rate=0.5)

def lrelu(x, leak=0.2):
    return tf.maximum(x, leak * x)

def sigmoid_cross_entropy_with_logits(x, y):
    return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=y)

def conv2d(inputs, kernel_size, filters, strides, padding='same', use_bias=True):
    return tf.layers.conv2d(inputs=inputs, kernel_size=kernel_size, filters=filters, strides=strides, padding=padding, use_bias=use_bias)
    
def batch_norm(inputs, is_training=is_training, decay=0.9):
    return tf.contrib.layers.batch_norm(inputs, is_training=is_training, decay=decay)
复制代码

判别器部分

def d_block(inputs, filters):
    h0 = lrelu(conv2d(inputs, 3, filters, 1))
    h0 = conv2d(h0, 3, filters, 1)
    h0 = lrelu(tf.add(h0, inputs))
    return h0

def discriminator(image, reuse=None):
    with tf.variable_scope('discriminator', reuse=reuse):
        h0 = image
        
        f = 32
        for i in range(5):
            if i < 3:
                h0 = lrelu(conv2d(h0, 4, f, 2))
            else:
                h0 = lrelu(conv2d(h0, 3, f, 2))
            h0 = d_block(h0, f)
            h0 = d_block(h0, f)
            f = f * 2
        
        h0 = lrelu(conv2d(h0, 3, f, 2))
        h0 = tf.contrib.layers.flatten(h0)
        Y_ = tf.layers.dense(h0, units=LABEL)
        h0 = tf.layers.dense(h0, units=1)
        return h0, Y_
复制代码

生成器部分

def g_block(inputs):
    h0 = tf.nn.relu(batch_norm(conv2d(inputs, 3, 64, 1, use_bias=False)))
    h0 = batch_norm(conv2d(h0, 3, 64, 1, use_bias=False))
    h0 = tf.add(h0, inputs)
    return h0

def generator(z, label):
    with tf.variable_scope('generator', reuse=None):
        d = 16
        z = tf.concat([z, label], axis=1)
        h0 = tf.layers.dense(z, units=d * d * 64)
        h0 = tf.reshape(h0, shape=[-1, d, d, 64])
        h0 = tf.nn.relu(batch_norm(h0))
        shortcut = h0
        
        for i in range(16):
            h0 = g_block(h0)
            
        h0 = tf.nn.relu(batch_norm(h0))
        h0 = tf.add(h0, shortcut)
        
        for i in range(3):
            h0 = conv2d(h0, 3, 256, 1, use_bias=False)
            h0 = tf.depth_to_space(h0, 2)
            h0 = tf.nn.relu(batch_norm(h0))
        
        h0 = tf.layers.conv2d(h0, kernel_size=9, filters=3, strides=1, padding='same', activation=tf.nn.tanh, name='g', use_bias=True)
        return h0
复制代码

损失函数,这里的gp项来自DRAGAN,arxiv.org/abs/1705.07…,WGAN使用真实样本和合成样本的插值,而DRAGAN使用真实样本和干扰样本的插值

g = generator(noise, noise_y)
d_real, y_real = discriminator(X)
d_fake, y_fake = discriminator(g, reuse=True)

loss_d_real = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_real, tf.ones_like(d_real)))
loss_d_fake = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake, tf.zeros_like(d_fake)))
loss_g_fake = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake, tf.ones_like(d_fake)))

loss_c_real = tf.reduce_mean(sigmoid_cross_entropy_with_logits(y_real, Y))
loss_c_fake = tf.reduce_mean(sigmoid_cross_entropy_with_logits(y_fake, noise_y))

loss_d = loss_d_real + loss_d_fake + BETA * loss_c_real
loss_g = loss_g_fake + BETA * loss_c_fake

alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1], minval=0., maxval=1.)
interpolates = alpha * X + (1 - alpha) * X_perturb
grad = tf.gradients(discriminator(interpolates, reuse=True)[0], [interpolates])[0]
slop = tf.sqrt(tf.reduce_sum(tf.square(grad), axis=[1]))
gp = tf.reduce_mean((slop - 1.) ** 2)
loss_d += LAMBDA * gp

vars_g = [var for var in tf.trainable_variables() if var.name.startswith('generator')]
vars_d = [var for var in tf.trainable_variables() if var.name.startswith('discriminator')]
复制代码

定义优化器

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    optimizer_d = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5).minimize(loss_d, var_list=vars_d)
    optimizer_g = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5).minimize(loss_g, var_list=vars_g)
复制代码

合成图片的函数

def montage(images):    
    if isinstance(images, list):
        images = np.array(images)
    img_h = images.shape[1]
    img_w = images.shape[2]
    n_plots = int(np.ceil(np.sqrt(images.shape[0])))
    if len(images.shape) == 4 and images.shape[3] == 3:
        m = np.ones(
            (images.shape[1] * n_plots + n_plots + 1,
             images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5
    elif len(images.shape) == 4 and images.shape[3] == 1:
        m = np.ones(
            (images.shape[1] * n_plots + n_plots + 1,
             images.shape[2] * n_plots + n_plots + 1, 1)) * 0.5
    elif len(images.shape) == 3:
        m = np.ones(
            (images.shape[1] * n_plots + n_plots + 1,
             images.shape[2] * n_plots + n_plots + 1)) * 0.5
    else:
        raise ValueError('Could not parse image shape of {}'.format(images.shape))
    for i in range(n_plots):
        for j in range(n_plots):
            this_filter = i * n_plots + j
            if this_filter < images.shape[0]:
                this_img = images[this_filter]
                m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
                  1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img
    return m
复制代码

整理数据

X_all = []
Y_all = []
for i in tqdm(range(len(images))):
    image = imread(images[i])
    image = (image / 255. - 0.5) * 2
    X_all.append(image)
    
    y = list(tags.loc[images[i]])
    Y_all.append(y[1:])

X_all = np.array(X_all)
Y_all = np.array(Y_all)
print(X_all.shape, Y_all.shape)
复制代码

定义随机产生标签的函数,原始数据中标签分布不均匀,但咱们但愿G能学到各类标签,因此均匀地生成各种标签

def get_random_tags():
    y = np.random.uniform(0.0, 1.0, [batch_size, LABEL]).astype(np.float32)
    y[y > 0.75] = 1
    y[y <= 0.75] = 0
    for i in range(batch_size):
        hc = np.random.randint(0, 13)
        hs = np.random.randint(13, 18)
        ec = np.random.randint(18, 28)
        y[i, :28] = 0
        y[i, hc] = 1 # hair color
        y[i, hs] = 1 # hair style
        y[i, ec] = 1 # eye color
    return y
复制代码

训练模型,CelebA中男女比例均衡,所以每次迭代随机取一批数据训练便可。但如今因为原始数据中各种标签分布不均匀,因此须要完整地迭代数据

sess = tf.Session()
sess.run(tf.global_variables_initializer())
z_samples = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
y_samples = get_random_tags()
for i in range(batch_size):
    y_samples[i, :28] = 0
    y_samples[i, i // 8 % 13] = 1 # hair color
    y_samples[i, i // 8 % 5 + 13] = 1 # hair style
    y_samples[i, i // 8 % 10 + 18] = 1 # eye color
samples = []
loss = {'d': [], 'g': []}

offset = 0
for i in tqdm(range(60000)):
    if offset + batch_size > X_all.shape[0]:
        offset = 0
    if offset == 0:
        data_index = np.arange(X_all.shape[0])
        np.random.shuffle(data_index)
        X_all = X_all[data_index, :, :, :]
        Y_all = Y_all[data_index, :]
    X_batch = X_all[offset: offset + batch_size, :, :, :]
    Y_batch = Y_all[offset: offset + batch_size, :]
    X_batch_perturb = X_batch + 0.5 * X_batch.std() * np.random.random(X_batch.shape)
    offset += batch_size
    
    n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
    ny = get_random_tags()
    _, d_ls = sess.run([optimizer_d, loss_d], feed_dict={X: X_batch, X_perturb: X_batch_perturb, Y: Y_batch, noise: n, noise_y: ny, is_training: True})    
    
    n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
    ny = get_random_tags()
    _, g_ls = sess.run([optimizer_g, loss_g], feed_dict={noise: n, noise_y: ny, is_training: True})
    
    loss['d'].append(d_ls)
    loss['g'].append(g_ls)
    
    _, lr = sess.run([add_global, learning_rate])
    
    if i % 500 == 0:
        print(i, d_ls, g_ls, lr)
        gen_imgs = sess.run(g, feed_dict={noise: z_samples, noise_y: y_samples, is_training: False})
        gen_imgs = (gen_imgs + 1) / 2
        imgs = [img[:, :, :] for img in gen_imgs]
        gen_imgs = montage(imgs)
        plt.axis('off')
        plt.imshow(gen_imgs)
        imsave(os.path.join(OUTPUT_DIR, 'sample_%d.jpg' % i), gen_imgs)
        plt.show()
        samples.append(gen_imgs)

plt.plot(loss['d'], label='Discriminator')
plt.plot(loss['g'], label='Generator')
plt.legend(loc='upper right')
plt.savefig('Loss.png')
plt.show()
mimsave(os.path.join(OUTPUT_DIR, 'samples.gif'), samples, fps=10)
复制代码

生成的二次元头像以下,每一行对应的头发颜色、发型、眼睛颜色相同,其余属性随机。少部分结果不太好,多是某些噪音或条件的问题

二次元头像生成结果

保存模型

saver = tf.train.Saver()
saver.save(sess, './anime_acgan', global_step=60000)
复制代码

在单机上加载模型,进行如下三项尝试:

  • 按原始标签分布随机生成样本
  • 生成指定标签的样本
  • 固定噪音,按原始标签分布生成样本
# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from imageio import imsave

batch_size = 64
z_dim = 128
LABEL = 34

def montage(images):    
    if isinstance(images, list):
        images = np.array(images)
    img_h = images.shape[1]
    img_w = images.shape[2]
    n_plots = int(np.ceil(np.sqrt(images.shape[0])))
    if len(images.shape) == 4 and images.shape[3] == 3:
        m = np.ones(
            (images.shape[1] * n_plots + n_plots + 1,
             images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5
    elif len(images.shape) == 4 and images.shape[3] == 1:
        m = np.ones(
            (images.shape[1] * n_plots + n_plots + 1,
             images.shape[2] * n_plots + n_plots + 1, 1)) * 0.5
    elif len(images.shape) == 3:
        m = np.ones(
            (images.shape[1] * n_plots + n_plots + 1,
             images.shape[2] * n_plots + n_plots + 1)) * 0.5
    else:
        raise ValueError('Could not parse image shape of {}'.format(images.shape))
    for i in range(n_plots):
        for j in range(n_plots):
            this_filter = i * n_plots + j
            if this_filter < images.shape[0]:
                this_img = images[this_filter]
                m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
                  1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img
    return m

def get_random_tags():
    y = np.random.uniform(0.0, 1.0, [batch_size, LABEL]).astype(np.float32)
    p_other = [0.6, 0.6, 0.25, 0.04488882, 0.3, 0.05384738]
    for i in range(batch_size):
        for j in range(len(p_other)):
            if y[i, j + 28] < p_other[j]:
                y[i, j + 28] = 1
            else:
                y[i, j + 28] = 0

    phc = [0.15968645, 0.21305391, 0.15491921, 0.10523116, 0.07953927, 0.09508879, 0.03567429, 0.07733163, 0.03157895, 0.01833307, 0.02236442, 0.00537514, 0.00182371]
    phs = [0.52989922,  0.37101264,  0.12567589,  0.00291153,  0.00847864]
    pec = [0.28350664, 0.15760678, 0.17862742, 0.13412254, 0.14212126, 0.0543913, 0.01020637, 0.00617501, 0.03167493, 0.00156775]
    for i in range(batch_size):
        y[i, :28] = 0

        hc = np.random.random()
        for j in range(len(phc)):
            if np.sum(phc[:j]) < hc < np.sum(phc[:j + 1]):
                y[i, j] = 1
                break

        hs = np.random.random()
        for j in range(len(phs)):
            if np.sum(phs[:j]) < hs < np.sum(phs[:j + 1]):
                y[i, j + 13] = 1
                break

        ec = np.random.random()
        for j in range(len(pec)):
            if np.sum(pec[:j]) < ec < np.sum(pec[:j + 1]):
                y[i, j + 18] = 1
                break
    return y

sess = tf.Session()
sess.run(tf.global_variables_initializer())

saver = tf.train.import_meta_graph('./anime_acgan-60000.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))

graph = tf.get_default_graph()
g = graph.get_tensor_by_name('generator/g/Tanh:0')
noise = graph.get_tensor_by_name('noise:0')
noise_y = graph.get_tensor_by_name('noise_y:0')
is_training = graph.get_tensor_by_name('is_training:0')

# 随机生成样本
z_samples = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
y_samples = get_random_tags()
gen_imgs = sess.run(g, feed_dict={noise: z_samples, noise_y: y_samples, is_training: False})
gen_imgs = (gen_imgs + 1) / 2
imgs = [img[:, :, :] for img in gen_imgs]
gen_imgs = montage(imgs)
gen_imgs = np.clip(gen_imgs, 0, 1)
imsave('1_二次元头像随机生成.jpg', gen_imgs)

# 生成指定标签的样本
all_tags = ['blonde hair', 'brown hair', 'black hair', 'blue hair', 'pink hair', 'purple hair', 'green hair', 'red hair', 'silver hair', 'white hair', 'orange hair', 'aqua hair', 'grey hair', 'long hair', 'short hair', 'twintails', 'drill hair', 'ponytail', 'blue eyes', 'red eyes', 'brown eyes', 'green eyes', 'purple eyes', 'yellow eyes', 'pink eyes', 'aqua eyes', 'black eyes', 'orange eyes', 'blush', 'smile', 'open mouth', 'hat', 'ribbon', 'glasses']
for i, tags in enumerate([['blonde hair', 'twintails', 'blush', 'smile', 'ribbon', 'red eyes'], ['silver hair', 'long hair', 'blush', 'smile', 'open mouth', 'blue eyes']]):
    z_samples = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
    y_samples = np.zeros([1, LABEL])
    for tag in tags:
        y_samples[0, all_tags.index(tag)] = 1
    y_samples = np.repeat(y_samples, batch_size, 0)
    gen_imgs = sess.run(g, feed_dict={noise: z_samples, noise_y: y_samples, is_training: False})
    gen_imgs = (gen_imgs + 1) / 2
    imgs = [img[:, :, :] for img in gen_imgs]
    gen_imgs = montage(imgs)
    gen_imgs = np.clip(gen_imgs, 0, 1)
    imsave('%d_二次元头像指定标签.jpg' % (i + 2), gen_imgs)

# 固定噪音随机标签
z_samples = np.random.uniform(-1.0, 1.0, [1, z_dim]).astype(np.float32)
z_samples = np.repeat(z_samples, batch_size, 0)
y_samples = get_random_tags()
gen_imgs = sess.run(g, feed_dict={noise: z_samples, noise_y: y_samples, is_training: False})
gen_imgs = (gen_imgs + 1) / 2
imgs = [img[:, :, :] for img in gen_imgs]
gen_imgs = montage(imgs)
gen_imgs = np.clip(gen_imgs, 0, 1)
imsave('4_二次元头像固定噪音.jpg', gen_imgs)
复制代码

按原始标签分布随机生成样本

二次元头像随机生成

生成金发、双马尾、脸红、微笑、系丝带、红眼睛的头像

二次元头像指定标签

生成银发、长发、脸红、微笑、张嘴、蓝眼睛的头像

二次元头像指定标签

固定噪音随机标签,使得头像主体大体相同但各类细节不同

二次元头像固定噪音

掌握以上内容后,也能够在CelebA上训练受40个01属性控制的ACGAN模型,并且比二次元头像更简单一些

参考

视频讲解课程

深度有趣(一)

相关文章
相关标签/搜索