-
Notifications
You must be signed in to change notification settings - Fork 1
/
styletransfer.py
215 lines (160 loc) · 7.17 KB
/
styletransfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# -*- coding: utf-8 -*-
"""styletransfer.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/10Id_gVH0a1zej8TUV64-mB2X5feuDCVZ
# Importing the Libraries
"""
# numpy is needed for dealing with large arrays
import numpy as np
# PIL Library is used to deal with images
from PIL import Image
# Handling External Requests
import requests
# Keras models
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras import backend
# Optimizing
from scipy.optimize import fmin_l_bfgs_b
# Involves in I/O to convert to bytes
from io import BytesIO
"""# Setting Hyperparameters"""
# Hyperparams
ITERATIONS = 10
CHANNELS = 3
IMAGE_SIZE = 224
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
CONTENT_WEIGHT = 100
STYLE_WEIGHT = 5000
TOTAL_VARIATION_WEIGHT = 0.995
TOTAL_VARIATION_LOSS_FACTOR = 1.25
"""# Loading Images"""
input_image_path = "https://cdn.mos.cms.futurecdn.net/SSsejZ9krDiV9N8TExmRGZ-1200-80.jpg"
style_image_path = "https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcR4835l1WsqLT3HannAC5L_dd6SVk1B1GQ7tA&usqp=CAU"
# Input image
input_image = Image.open(BytesIO(requests.get(input_image_path).content))
input_image = input_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
input_image.save('input.png')
input_image
# Style Image
style_image = Image.open(BytesIO(requests.get(style_image_path).content))
style_image = style_image.resize((224,224))
style_image.save('output.png')
style_image
"""# Image-Preprocessing"""
# Need to convert the images from 3D to 4 Dimensions
input_image_array = np.asarray(input_image, dtype="float32")
input_image_array = np.expand_dims(input_image_array, axis=0)
print(input_image_array)
input_image_array = input_image_array[:, :, :, ::-1]
style_image_array = np.asarray(style_image, dtype="float32")
style_image_array = np.expand_dims(style_image_array, axis=0)
style_image_array = style_image_array[:, :, :, ::-1]
# Convert numpy arrays to tensors
input_image = backend.variable(input_image_array)
style_image = backend.variable(style_image_array)
combination_image = backend.placeholder((1, IMAGE_HEIGHT, IMAGE_SIZE, 3))
print(combination_image)
# Making a iinput Tensor by concatenating the input images
input_tensor = backend.concatenate([input_image,style_image,combination_image], axis=0)
print(input_tensor)
"""# Modeling"""
# Passing Input Tensors to VGG16 Model
model = VGG16(input_tensor=input_tensor, include_top=False)
model.summary()
"""# Computing Losses
## Getting Data from Model Layers
"""
# Loading the layer output from VGG16 Model
layers = dict([(layer.name, layer.output) for layer in model.layers])
# Extracting the layers of CONTENT from the above layers mostly from the front layers in the architecture because according to the paper
# It is mentioned that front layers contain content of the image
layer_features = layers["block2_conv2"]
layer_features.shape
# Splitting layer data into content and output image
content_image_features = layer_features[0, :, :, :]
combination_features = layer_features[2, :, :, :]
"""## Content Loss"""
# We need to define a function i.e content loss which is computing the loss between output image and content image
def content_loss(content, combination):
return backend.sum(backend.square(combination - content))
# Initializing total loss to zero
loss = 0
# Calculating the content loss and adding the loss to the total loss
loss =loss+CONTENT_WEIGHT * content_loss(content_image_features,combination_features)
"""## Style Loss
Generally gram matrix is a matrix with dimension equal to no of channels * no of channels which has a dot product with it's transpose which forms a gram matrix
"""
# Computing the gram matrix
def gram_matrix(x):
features = backend.batch_flatten(backend.permute_dimensions(x, (2, 0, 1)))
gram = backend.dot(features, backend.transpose(features))
return gram
# Style Loss function generally computes the loss between 2 gram matrices of the layers output for the style and combination images
def compute_style_loss(style, combination):
style = gram_matrix(style)
combination = gram_matrix(combination)
size = IMAGE_HEIGHT * IMAGE_WIDTH
return backend.sum(backend.square(style - combination)) / (4. * (CHANNELS ** 2) * (size ** 2))
# Computing Loss over 5 different layers to find the style loss
style_layers = ["block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1", "block5_conv1"]
for layer_name in style_layers:
layer_features = layers[layer_name]
style_features = layer_features[1, :, :, :]
combination_features = layer_features[2, :, :, :]
style_loss = compute_style_loss(style_features, combination_features)
# adding the style loss at each layer to total loss
loss=loss+(STYLE_WEIGHT / len(style_layers)) * style_loss
"""## Variation Loss
It was observed that optimization to reduce only the style and content losses led to highly pixelated and noisy outputs. To cover the same, total variation loss was introduced. The total variation loss is analogous to regularization loss. This is introduced for ensuring spatial continuity and smoothness in the generated image to avoid noisy and overly pixelated results. The same is defined in the function as follows:
"""
# Adding the variation Loss makes the image noise free and smooth
def total_variation_loss(x):
a = backend.square(x[:, :IMAGE_HEIGHT-1, :IMAGE_WIDTH-1, :] - x[:, 1:, :IMAGE_WIDTH-1, :])
b = backend.square(x[:, :IMAGE_HEIGHT-1, :IMAGE_WIDTH-1, :] - x[:, :IMAGE_HEIGHT-1, 1:, :])
return backend.sum(backend.pow(a + b, TOTAL_VARIATION_LOSS_FACTOR))
loss += TOTAL_VARIATION_WEIGHT * total_variation_loss(combination_image)
import tensorflow
tensorflow.compat.v1.disable_eager_execution()
# Adding the loss with gradients with respect to combination_image
outputs = [loss]
outputs=outputs+backend.gradients(loss, combination_image)
"""# Using Optimization fmin_l_bfgs_b"""
# Obtaining the loss and gradients
def evaluate_loss_and_gradients(x):
x = x.reshape((1, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
outs = backend.function([combination_image], outputs)([x])
loss = outs[0]
gradients = outs[1].flatten().astype("float64")
return loss, gradients
# This function is used to make optimizer function work because here we use fmin_l_bfgs_b which optimizes the argument ehich is in the function
class Evaluator:
def loss(self, x):
loss, gradients = evaluate_loss_and_gradients(x)
self._gradients = gradients
return loss
def gradients(self, x):
return self._gradients
evaluator = Evaluator()
"""# Training the Image"""
# Initialising a random image
x = np.random.uniform(0, 255, (1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)) - 128.
for i in range(ITERATIONS+10):
x, loss, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.gradients, maxfun=20)
print("Iteration i completed loss is ",loss)
x = x.reshape((IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
x = x[:, :, ::-1]
x = np.clip(x, 0, 255).astype("uint8")
output_image = Image.fromarray(x)
output_image.save("output.png")
output_image
"""# Output image"""
import matplotlib.pyplot as plt
plt.imshow(plt.imread('input.png'))
plt.show()
plt.imshow(plt.imread('style.png'))
plt.show()
plt.imshow(plt.imread('output.png'))
plt.show()