styletransfer.py

# -*- coding: utf-8 -*-
"""styletransfer.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/10Id_gVH0a1zej8TUV64-mB2X5feuDCVZ

# Importing the Libraries
"""

# numpy is needed for dealing with large arrays
import numpy as np
# PIL Library is used to deal with images
from PIL import Image
# Handling External Requests
import requests
# Keras models
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras import backend
# Optimizing 
from scipy.optimize import fmin_l_bfgs_b
# Involves in I/O to convert to bytes
from io import BytesIO

"""# Setting Hyperparameters"""

# Hyperparams
ITERATIONS = 10
CHANNELS = 3
IMAGE_SIZE = 224
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
CONTENT_WEIGHT = 100
STYLE_WEIGHT = 5000
TOTAL_VARIATION_WEIGHT = 0.995
TOTAL_VARIATION_LOSS_FACTOR = 1.25

"""# Loading Images"""

input_image_path = "https://cdn.mos.cms.futurecdn.net/SSsejZ9krDiV9N8TExmRGZ-1200-80.jpg"
style_image_path = "https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcR4835l1WsqLT3HannAC5L_dd6SVk1B1GQ7tA&usqp=CAU"

# Input image
input_image = Image.open(BytesIO(requests.get(input_image_path).content))
input_image = input_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
input_image.save('input.png')
input_image

# Style Image
style_image = Image.open(BytesIO(requests.get(style_image_path).content))
style_image = style_image.resize((224,224))
style_image.save('output.png')
style_image

"""# Image-Preprocessing"""

# Need to convert the images from 3D to 4 Dimensions
input_image_array = np.asarray(input_image, dtype="float32")
input_image_array = np.expand_dims(input_image_array, axis=0)
print(input_image_array)
input_image_array = input_image_array[:, :, :, ::-1]

style_image_array = np.asarray(style_image, dtype="float32")
style_image_array = np.expand_dims(style_image_array, axis=0)
style_image_array = style_image_array[:, :, :, ::-1]

# Convert numpy arrays to tensors

input_image = backend.variable(input_image_array)
style_image = backend.variable(style_image_array)
combination_image = backend.placeholder((1, IMAGE_HEIGHT, IMAGE_SIZE, 3))
print(combination_image)

# Making a iinput Tensor by concatenating the input images

input_tensor = backend.concatenate([input_image,style_image,combination_image], axis=0)
print(input_tensor)

"""# Modeling"""

# Passing Input Tensors to VGG16 Model 
model = VGG16(input_tensor=input_tensor, include_top=False)

model.summary()

"""# Computing Losses

## Getting Data from Model Layers
"""

# Loading the layer output from VGG16 Model
layers = dict([(layer.name, layer.output) for layer in model.layers])

# Extracting the layers of CONTENT from the above layers mostly from the front layers in the architecture because according to the paper 
# It is mentioned that front layers contain content of the image
layer_features = layers["block2_conv2"]

layer_features.shape

# Splitting layer data into content and output image
content_image_features = layer_features[0, :, :, :]
combination_features = layer_features[2, :, :, :]

"""## Content Loss"""

# We need to define a function i.e content loss which is computing the loss between output image and content image
def content_loss(content, combination):
    return backend.sum(backend.square(combination - content))

# Initializing total loss to zero
loss = 0
# Calculating the content loss and adding the loss to the total loss
loss =loss+CONTENT_WEIGHT * content_loss(content_image_features,combination_features)

"""## Style Loss

Generally gram matrix is a matrix with dimension equal to no of channels * no of channels which has a dot product with it's transpose which forms a gram matrix
"""

# Computing the gram matrix 
def gram_matrix(x):
    features = backend.batch_flatten(backend.permute_dimensions(x, (2, 0, 1)))
    gram = backend.dot(features, backend.transpose(features))
    return gram

# Style Loss function generally computes the loss between 2 gram matrices of the layers output for the style and combination images 
def compute_style_loss(style, combination):
    style = gram_matrix(style)
    combination = gram_matrix(combination)
    size = IMAGE_HEIGHT * IMAGE_WIDTH
    return backend.sum(backend.square(style - combination)) / (4. * (CHANNELS ** 2) * (size ** 2))

# Computing Loss over 5 different layers to find the style loss

style_layers = ["block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1", "block5_conv1"]
for layer_name in style_layers:
    layer_features = layers[layer_name]
    style_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    style_loss = compute_style_loss(style_features, combination_features)
    # adding the style loss at each layer to total loss
    loss=loss+(STYLE_WEIGHT / len(style_layers)) * style_loss

"""## Variation Loss

It was observed that optimization to reduce only the style and content losses led to highly pixelated and noisy outputs. To cover the same, total variation loss was introduced. The total variation loss is analogous to regularization loss. This is introduced for ensuring spatial continuity and smoothness in the generated image to avoid noisy and overly pixelated results. The same is defined in the function as follows:
"""

# Adding the variation Loss makes the image noise free and smooth

def total_variation_loss(x):
    a = backend.square(x[:, :IMAGE_HEIGHT-1, :IMAGE_WIDTH-1, :] - x[:, 1:, :IMAGE_WIDTH-1, :])
    b = backend.square(x[:, :IMAGE_HEIGHT-1, :IMAGE_WIDTH-1, :] - x[:, :IMAGE_HEIGHT-1, 1:, :])
    return backend.sum(backend.pow(a + b, TOTAL_VARIATION_LOSS_FACTOR))

loss += TOTAL_VARIATION_WEIGHT * total_variation_loss(combination_image)

import tensorflow
tensorflow.compat.v1.disable_eager_execution()

# Adding the loss with gradients with respect to combination_image
outputs = [loss]
outputs=outputs+backend.gradients(loss, combination_image)

"""# Using Optimization fmin_l_bfgs_b"""

# Obtaining the loss and gradients  
def evaluate_loss_and_gradients(x):
    x = x.reshape((1, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
    outs = backend.function([combination_image], outputs)([x])
    loss = outs[0]
    gradients = outs[1].flatten().astype("float64")
    return loss, gradients

# This function is used to make optimizer function work because here we use fmin_l_bfgs_b which optimizes the argument ehich is in the function 
class Evaluator:

    def loss(self, x):
        loss, gradients = evaluate_loss_and_gradients(x)
        self._gradients = gradients
        return loss

    def gradients(self, x):
        return self._gradients

evaluator = Evaluator()

"""# Training the Image"""

# Initialising a random image
x = np.random.uniform(0, 255, (1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)) - 128.

for i in range(ITERATIONS+10):
    x, loss, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.gradients, maxfun=20)
    print("Iteration i completed loss is ",loss)
    
x = x.reshape((IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
x = x[:, :, ::-1]
x = np.clip(x, 0, 255).astype("uint8")
output_image = Image.fromarray(x)
output_image.save("output.png")
output_image

"""# Output image"""

import matplotlib.pyplot as plt
plt.imshow(plt.imread('input.png'))
plt.show()
plt.imshow(plt.imread('style.png'))
plt.show()
plt.imshow(plt.imread('output.png'))
plt.show()