Skip to content

Commit

Permalink
Write example using CloudBucketMount with LoRAs (#635)
Browse files Browse the repository at this point in the history
* Adds S3 mount LoRA example

* Format file

* Linting

* Fixes gpu type

* Changes search_loras() function to run remotel

* Improve call arguments

* Add frontmatter

* adds a Gradio UI, adds more text

---------

Co-authored-by: Charles Frye <charles@modal.com>
  • Loading branch information
luiscape and charlesfrye authored Mar 14, 2024
1 parent 8762563 commit e954ef8
Show file tree
Hide file tree
Showing 2 changed files with 307 additions and 0 deletions.
307 changes: 307 additions & 0 deletions 10_integrations/cloud_bucket_mount_loras.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
# ---
# output-directory: "/tmp/stable-diffusion-xl"
# runtimes: ["runc", "gvisor"]
# ---
# # LoRAs Galore: Create a LoRA Playground with Modal, Gradio, and S3
#
# This example shows how to mount an S3 bucket in a Modal app using [`CloudBucketMount`](https://modal.com/docs/reference/modal.CloudBucketMount).
# We will download a bunch of LoRA adapters from the [HuggingFace Hub](https://huggingface.co/models) into our S3 bucket
# then read from that bucket, on the fly, when doing inference.
#
# By default, we use the [IKEA instructions LoRA](https://huggingface.co/ostris/ikea-instructions-lora-sdxl) as an example,
# which produces the following image when prompted to generate "IKEA instructions for building a GPU rig for deep learning":
#
# ![IKEA instructions for building a GPU rig for deep learning](./ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png)
#
# By the end of this example, we've deployed a "playground" app where anyone with a browser can try
# out these custom models. That's the power of Modal: custom, autoscaling AI applications, deployed in seconds.
# You can try out our deployment [here](https://modal-labs--loras-galore-app.modal.run).
#
# ## Basic setup
#

import io
import os
from pathlib import Path

from modal import (
CloudBucketMount, # the star of the show
Image,
Secret,
Stub,
asgi_app,
build,
enter,
method,
)

# You will need to have an S3 bucket and AWS credentials to run this example. Refer to the documentation
# for the detailed [IAM permissions](https://modal.com/docs/guide/cloud-bucket-mounts#iam-permissions) those credentials will need.
#
# After you are done creating a bucket and configuring IAM settings,
# you now need to create a [Modal Secret](https://modal.com/docs/guide/secrets). Navigate to the "Secrets" tab and
# click on the AWS card, then fill in the fields with the AWS key and secret created
# previously. Name the Secret `s3-bucket-secret`.

bucket_secret = Secret.from_name("s3-bucket-secret")

MOUNT_PATH: Path = Path("/mnt/bucket")
LORAS_PATH: Path = MOUNT_PATH / "loras/v5"

# Modal runs serverless functions inside containers.
# The environments those functions run in are defined by
# the container `Image`. The line below constructs an image
# with the dependencies we need -- no need to install them locally.

image = Image.debian_slim().pip_install(
"huggingface_hub==0.21.4",
"transformers==4.38.2",
"diffusers==0.26.3",
"peft==0.9.0",
"accelerate==0.27.2",
)

with image.imports():
# we import these dependencies only inside the container
import diffusers
import huggingface_hub
import torch

# We attach the S3 bucket to all the Modal functions in this app by mounting it on the filesystem they see,
# passing a `CloudBucketMount` to the `volumes` dictionary argument. We can read and write to this mounted bucket
# (almost) as if it were a local directory.
stub = Stub(
"loras-galore",
image=image,
volumes={
MOUNT_PATH: CloudBucketMount(
"modal-s3mount-test-bucket",
secret=bucket_secret,
)
},
)


# ## Acquiring LoRA weights
#
# `search_loras()` will use the Hub API to search for LoRAs. We limit LoRAs
# to a maximum size to avoid downloading very large model weights.
# We went with 800 MiB, but feel free to adapt to what works best for you.
@stub.function()
def search_loras(limit: int, max_model_size: int = 1024 * 1024 * 1024):
api = huggingface_hub.HfApi()

model_ids: list[str] = []
for model in api.list_models(
tags=["lora", "base_model:stabilityai/stable-diffusion-xl-base-1.0"],
library="diffusers",
sort="downloads", # sort by most downloaded
):
try:
model_size = 0
for file in api.list_files_info(model.id):
model_size += file.size

except huggingface_hub.utils.GatedRepoError:
print(f"gated model ({model.id}); skipping")
continue

# Skip models that are larger than file limit.
if model_size > max_model_size:
print(f"model {model.id} is too large; skipping")
continue

model_ids.append(model.id)
if len(model_ids) >= limit:
return model_ids

return model_ids


# Download LoRA weights to the S3 mount. Downloading files in this mount will automatically
# upload files to S3. We will run this function in parallel using Modal's [`map`](https://modal.com/docs/reference/modal.Function#map).
@stub.function()
def download_lora(repository_id: str) -> str:
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

# CloudBucketMounts will report 0 bytes of available space leading to many
# unnecessary warnings, so we patch the method that emits those warnings.
from huggingface_hub import file_download

file_download._check_disk_space = lambda x, y: False

repository_path = LORAS_PATH / repository_id
try:
# skip models we've already downloaded
if not repository_path.exists():
huggingface_hub.snapshot_download(
repository_id,
local_dir=repository_path.as_posix().replace(".", "_"),
allow_patterns=["*.safetensors"],
)
downloaded_lora = len(list(repository_path.rglob("*.safetensors"))) > 0
except OSError:
downloaded_lora = False
except FileNotFoundError:
downloaded_lora = False
if downloaded_lora:
return repository_id


# The `StableDiffusionLoRA` class loads Stable Diffusion XL 1.0 as a base model. When doing inference,
# it will also load whichever LoRA you specify from the S3 bucket.
# For more on the decorators we use below to speed up building and booting,
# check out the [container lifecycle hooks guide](https://modal.com/docs/guide/lifecycle-hooks).
@stub.cls(gpu="a10g") # A10G GPUs are great for inference
class StableDiffusionLoRA:
pipe_id = "stabilityai/stable-diffusion-xl-base-1.0"

@build() # when we setup our image, we download the base model
def build(self):
diffusers.DiffusionPipeline.from_pretrained(
self.pipe_id, torch_dtype=torch.float16
)

@enter() # when a new container starts, we load the base model into the GPU
def load(self):
self.pipe = diffusers.DiffusionPipeline.from_pretrained(
self.pipe_id, torch_dtype=torch.float16
).to("cuda")

@method() # at inference time, we pull in the LoRA weights and pass the final model the prompt
def run_inference_with_lora(
self, lora_id: str, prompt: str, seed: int = 8888
) -> bytes:
for file in (LORAS_PATH / lora_id).rglob("*.safetensors"):
self.pipe.load_lora_weights(lora_id, weight_name=file.name)
break

lora_scale = 0.9
image = self.pipe(
prompt,
num_inference_steps=30,
cross_attention_kwargs={"scale": lora_scale},
generator=torch.manual_seed(seed),
).images[0]

buffer = io.BytesIO()
image.save(buffer, format="PNG")

return buffer.getvalue()


# To try out our program locally, we add a `local_entrypoint`.
# Run it using `modal run cloud_bucket_mount_loras.py`, and pass `--help`
# to see the available options.
@stub.local_entrypoint()
def main(
limit: int = 100,
example_lora: str = "ostris/ikea-instructions-lora-sdxl",
prompt: str = "IKEA instructions for building a GPU rig for deep learning",
seed: int = 8888,
):
# Download LoRAs in parallel.
lora_model_ids = [example_lora]
lora_model_ids += search_loras.remote(limit)

downloaded_loras = []
for model in download_lora.map(lora_model_ids):
if model:
downloaded_loras.append(model)

print(f"downloaded {len(downloaded_loras)} loras => {downloaded_loras}")

# Run inference using one of the downloaded LoRAs.
byte_stream = StableDiffusionLoRA().run_inference_with_lora.remote(
example_lora, prompt, seed
)
dir = Path("/tmp/stable-diffusion-xl")
if not dir.exists():
dir.mkdir(exist_ok=True, parents=True)

output_path = dir / f"{as_slug(prompt.lower())}.png"
print(f"Saving it to {output_path}")
with open(output_path, "wb") as f:
f.write(byte_stream)


# ## Trying out LoRAs
#
# Command line tools are cool, but we can do better!
# With the Gradio library by Hugging Face, we can create a simple web interface
# around our Python inference function, then use Modal to host it for anyone to try out.

from fastapi import FastAPI

web_app = FastAPI()
web_image = Image.debian_slim().pip_install("gradio~=3.50.2", "pillow~=10.2.0")


@stub.function(image=web_image)
@asgi_app()
def app():
"""A simple Gradio interface around our LoRA inference."""
import io

import gradio as gr
from gradio.routes import mount_gradio_app
from PIL import Image

# determine with loras are available
lora_ids = [
f"{lora_dir.parent.stem}/{lora_dir.stem}"
for lora_dir in LORAS_PATH.glob("*/*")
]

# pick one to be default, set a default prompt
default_lora_id = (
"ostris/ikea-instructions-lora-sdxl"
if "ostris/ikea-instructions-lora-sdxl" in lora_ids
else lora_ids[0]
)
default_prompt = (
"IKEA instructions for building a GPU rig for deep learning"
if default_lora_id == "ostris/ikea-instructions-lora-sdxl"
else "text"
)

# the simple path to making an app on Gradio is an Interface: a UI wrapped around a function.
def go(lora_id: str, prompt: str, seed: int) -> Image:
return Image.open(
io.BytesIO(
StableDiffusionLoRA().run_inference_with_lora.remote(
lora_id, prompt, seed
)
),
)

iface = gr.Interface(
go,
inputs=[ # the inputs to go/our inference function
gr.Dropdown(
choices=lora_ids, value=default_lora_id, label="👉 LoRA ID"
),
gr.Textbox(default_prompt, label="🎨 Prompt"),
gr.Number(value=8888, label="🎲 Random Seed"),
],
outputs=gr.Image(label="Generated Image"),
# some extra bits to make it look nicer
title="Try out some of the top custom SDXL models!",
description="Pick a LoRA finetune of SDXL from the dropdown, then prompt it to generate an image."
"\n\nCheck out [the code on GitHub](https://github.com/modal-labs/examples/blob/main/10_integrations/cloud_bucket_mount_loras.py)"
" if you want to create your own version or just see how it works."
"\n\nPowered by [Modal](https://modal.com) 🚀",
theme="soft",
allow_flagging="never",
)

return mount_gradio_app(app=web_app, blocks=iface, path="/")


def as_slug(name):
"""Converts a string, e.g. a prompt, into something we can use as a filename."""
import re

s = str(name).strip().replace(" ", "-")
s = re.sub(r"(?u)[^-\w.]", "", s)
return s
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit e954ef8

Please sign in to comment.