-
Notifications
You must be signed in to change notification settings - Fork 176
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Write example using
CloudBucketMount
with LoRAs (#635)
* Adds S3 mount LoRA example * Format file * Linting * Fixes gpu type * Changes search_loras() function to run remotel * Improve call arguments * Add frontmatter * adds a Gradio UI, adds more text --------- Co-authored-by: Charles Frye <charles@modal.com>
- Loading branch information
1 parent
8762563
commit e954ef8
Showing
2 changed files
with
307 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,307 @@ | ||
# --- | ||
# output-directory: "/tmp/stable-diffusion-xl" | ||
# runtimes: ["runc", "gvisor"] | ||
# --- | ||
# # LoRAs Galore: Create a LoRA Playground with Modal, Gradio, and S3 | ||
# | ||
# This example shows how to mount an S3 bucket in a Modal app using [`CloudBucketMount`](https://modal.com/docs/reference/modal.CloudBucketMount). | ||
# We will download a bunch of LoRA adapters from the [HuggingFace Hub](https://huggingface.co/models) into our S3 bucket | ||
# then read from that bucket, on the fly, when doing inference. | ||
# | ||
# By default, we use the [IKEA instructions LoRA](https://huggingface.co/ostris/ikea-instructions-lora-sdxl) as an example, | ||
# which produces the following image when prompted to generate "IKEA instructions for building a GPU rig for deep learning": | ||
# | ||
# ![IKEA instructions for building a GPU rig for deep learning](./ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png) | ||
# | ||
# By the end of this example, we've deployed a "playground" app where anyone with a browser can try | ||
# out these custom models. That's the power of Modal: custom, autoscaling AI applications, deployed in seconds. | ||
# You can try out our deployment [here](https://modal-labs--loras-galore-app.modal.run). | ||
# | ||
# ## Basic setup | ||
# | ||
|
||
import io | ||
import os | ||
from pathlib import Path | ||
|
||
from modal import ( | ||
CloudBucketMount, # the star of the show | ||
Image, | ||
Secret, | ||
Stub, | ||
asgi_app, | ||
build, | ||
enter, | ||
method, | ||
) | ||
|
||
# You will need to have an S3 bucket and AWS credentials to run this example. Refer to the documentation | ||
# for the detailed [IAM permissions](https://modal.com/docs/guide/cloud-bucket-mounts#iam-permissions) those credentials will need. | ||
# | ||
# After you are done creating a bucket and configuring IAM settings, | ||
# you now need to create a [Modal Secret](https://modal.com/docs/guide/secrets). Navigate to the "Secrets" tab and | ||
# click on the AWS card, then fill in the fields with the AWS key and secret created | ||
# previously. Name the Secret `s3-bucket-secret`. | ||
|
||
bucket_secret = Secret.from_name("s3-bucket-secret") | ||
|
||
MOUNT_PATH: Path = Path("/mnt/bucket") | ||
LORAS_PATH: Path = MOUNT_PATH / "loras/v5" | ||
|
||
# Modal runs serverless functions inside containers. | ||
# The environments those functions run in are defined by | ||
# the container `Image`. The line below constructs an image | ||
# with the dependencies we need -- no need to install them locally. | ||
|
||
image = Image.debian_slim().pip_install( | ||
"huggingface_hub==0.21.4", | ||
"transformers==4.38.2", | ||
"diffusers==0.26.3", | ||
"peft==0.9.0", | ||
"accelerate==0.27.2", | ||
) | ||
|
||
with image.imports(): | ||
# we import these dependencies only inside the container | ||
import diffusers | ||
import huggingface_hub | ||
import torch | ||
|
||
# We attach the S3 bucket to all the Modal functions in this app by mounting it on the filesystem they see, | ||
# passing a `CloudBucketMount` to the `volumes` dictionary argument. We can read and write to this mounted bucket | ||
# (almost) as if it were a local directory. | ||
stub = Stub( | ||
"loras-galore", | ||
image=image, | ||
volumes={ | ||
MOUNT_PATH: CloudBucketMount( | ||
"modal-s3mount-test-bucket", | ||
secret=bucket_secret, | ||
) | ||
}, | ||
) | ||
|
||
|
||
# ## Acquiring LoRA weights | ||
# | ||
# `search_loras()` will use the Hub API to search for LoRAs. We limit LoRAs | ||
# to a maximum size to avoid downloading very large model weights. | ||
# We went with 800 MiB, but feel free to adapt to what works best for you. | ||
@stub.function() | ||
def search_loras(limit: int, max_model_size: int = 1024 * 1024 * 1024): | ||
api = huggingface_hub.HfApi() | ||
|
||
model_ids: list[str] = [] | ||
for model in api.list_models( | ||
tags=["lora", "base_model:stabilityai/stable-diffusion-xl-base-1.0"], | ||
library="diffusers", | ||
sort="downloads", # sort by most downloaded | ||
): | ||
try: | ||
model_size = 0 | ||
for file in api.list_files_info(model.id): | ||
model_size += file.size | ||
|
||
except huggingface_hub.utils.GatedRepoError: | ||
print(f"gated model ({model.id}); skipping") | ||
continue | ||
|
||
# Skip models that are larger than file limit. | ||
if model_size > max_model_size: | ||
print(f"model {model.id} is too large; skipping") | ||
continue | ||
|
||
model_ids.append(model.id) | ||
if len(model_ids) >= limit: | ||
return model_ids | ||
|
||
return model_ids | ||
|
||
|
||
# Download LoRA weights to the S3 mount. Downloading files in this mount will automatically | ||
# upload files to S3. We will run this function in parallel using Modal's [`map`](https://modal.com/docs/reference/modal.Function#map). | ||
@stub.function() | ||
def download_lora(repository_id: str) -> str: | ||
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" | ||
|
||
# CloudBucketMounts will report 0 bytes of available space leading to many | ||
# unnecessary warnings, so we patch the method that emits those warnings. | ||
from huggingface_hub import file_download | ||
|
||
file_download._check_disk_space = lambda x, y: False | ||
|
||
repository_path = LORAS_PATH / repository_id | ||
try: | ||
# skip models we've already downloaded | ||
if not repository_path.exists(): | ||
huggingface_hub.snapshot_download( | ||
repository_id, | ||
local_dir=repository_path.as_posix().replace(".", "_"), | ||
allow_patterns=["*.safetensors"], | ||
) | ||
downloaded_lora = len(list(repository_path.rglob("*.safetensors"))) > 0 | ||
except OSError: | ||
downloaded_lora = False | ||
except FileNotFoundError: | ||
downloaded_lora = False | ||
if downloaded_lora: | ||
return repository_id | ||
|
||
|
||
# The `StableDiffusionLoRA` class loads Stable Diffusion XL 1.0 as a base model. When doing inference, | ||
# it will also load whichever LoRA you specify from the S3 bucket. | ||
# For more on the decorators we use below to speed up building and booting, | ||
# check out the [container lifecycle hooks guide](https://modal.com/docs/guide/lifecycle-hooks). | ||
@stub.cls(gpu="a10g") # A10G GPUs are great for inference | ||
class StableDiffusionLoRA: | ||
pipe_id = "stabilityai/stable-diffusion-xl-base-1.0" | ||
|
||
@build() # when we setup our image, we download the base model | ||
def build(self): | ||
diffusers.DiffusionPipeline.from_pretrained( | ||
self.pipe_id, torch_dtype=torch.float16 | ||
) | ||
|
||
@enter() # when a new container starts, we load the base model into the GPU | ||
def load(self): | ||
self.pipe = diffusers.DiffusionPipeline.from_pretrained( | ||
self.pipe_id, torch_dtype=torch.float16 | ||
).to("cuda") | ||
|
||
@method() # at inference time, we pull in the LoRA weights and pass the final model the prompt | ||
def run_inference_with_lora( | ||
self, lora_id: str, prompt: str, seed: int = 8888 | ||
) -> bytes: | ||
for file in (LORAS_PATH / lora_id).rglob("*.safetensors"): | ||
self.pipe.load_lora_weights(lora_id, weight_name=file.name) | ||
break | ||
|
||
lora_scale = 0.9 | ||
image = self.pipe( | ||
prompt, | ||
num_inference_steps=30, | ||
cross_attention_kwargs={"scale": lora_scale}, | ||
generator=torch.manual_seed(seed), | ||
).images[0] | ||
|
||
buffer = io.BytesIO() | ||
image.save(buffer, format="PNG") | ||
|
||
return buffer.getvalue() | ||
|
||
|
||
# To try out our program locally, we add a `local_entrypoint`. | ||
# Run it using `modal run cloud_bucket_mount_loras.py`, and pass `--help` | ||
# to see the available options. | ||
@stub.local_entrypoint() | ||
def main( | ||
limit: int = 100, | ||
example_lora: str = "ostris/ikea-instructions-lora-sdxl", | ||
prompt: str = "IKEA instructions for building a GPU rig for deep learning", | ||
seed: int = 8888, | ||
): | ||
# Download LoRAs in parallel. | ||
lora_model_ids = [example_lora] | ||
lora_model_ids += search_loras.remote(limit) | ||
|
||
downloaded_loras = [] | ||
for model in download_lora.map(lora_model_ids): | ||
if model: | ||
downloaded_loras.append(model) | ||
|
||
print(f"downloaded {len(downloaded_loras)} loras => {downloaded_loras}") | ||
|
||
# Run inference using one of the downloaded LoRAs. | ||
byte_stream = StableDiffusionLoRA().run_inference_with_lora.remote( | ||
example_lora, prompt, seed | ||
) | ||
dir = Path("/tmp/stable-diffusion-xl") | ||
if not dir.exists(): | ||
dir.mkdir(exist_ok=True, parents=True) | ||
|
||
output_path = dir / f"{as_slug(prompt.lower())}.png" | ||
print(f"Saving it to {output_path}") | ||
with open(output_path, "wb") as f: | ||
f.write(byte_stream) | ||
|
||
|
||
# ## Trying out LoRAs | ||
# | ||
# Command line tools are cool, but we can do better! | ||
# With the Gradio library by Hugging Face, we can create a simple web interface | ||
# around our Python inference function, then use Modal to host it for anyone to try out. | ||
|
||
from fastapi import FastAPI | ||
|
||
web_app = FastAPI() | ||
web_image = Image.debian_slim().pip_install("gradio~=3.50.2", "pillow~=10.2.0") | ||
|
||
|
||
@stub.function(image=web_image) | ||
@asgi_app() | ||
def app(): | ||
"""A simple Gradio interface around our LoRA inference.""" | ||
import io | ||
|
||
import gradio as gr | ||
from gradio.routes import mount_gradio_app | ||
from PIL import Image | ||
|
||
# determine with loras are available | ||
lora_ids = [ | ||
f"{lora_dir.parent.stem}/{lora_dir.stem}" | ||
for lora_dir in LORAS_PATH.glob("*/*") | ||
] | ||
|
||
# pick one to be default, set a default prompt | ||
default_lora_id = ( | ||
"ostris/ikea-instructions-lora-sdxl" | ||
if "ostris/ikea-instructions-lora-sdxl" in lora_ids | ||
else lora_ids[0] | ||
) | ||
default_prompt = ( | ||
"IKEA instructions for building a GPU rig for deep learning" | ||
if default_lora_id == "ostris/ikea-instructions-lora-sdxl" | ||
else "text" | ||
) | ||
|
||
# the simple path to making an app on Gradio is an Interface: a UI wrapped around a function. | ||
def go(lora_id: str, prompt: str, seed: int) -> Image: | ||
return Image.open( | ||
io.BytesIO( | ||
StableDiffusionLoRA().run_inference_with_lora.remote( | ||
lora_id, prompt, seed | ||
) | ||
), | ||
) | ||
|
||
iface = gr.Interface( | ||
go, | ||
inputs=[ # the inputs to go/our inference function | ||
gr.Dropdown( | ||
choices=lora_ids, value=default_lora_id, label="👉 LoRA ID" | ||
), | ||
gr.Textbox(default_prompt, label="🎨 Prompt"), | ||
gr.Number(value=8888, label="🎲 Random Seed"), | ||
], | ||
outputs=gr.Image(label="Generated Image"), | ||
# some extra bits to make it look nicer | ||
title="Try out some of the top custom SDXL models!", | ||
description="Pick a LoRA finetune of SDXL from the dropdown, then prompt it to generate an image." | ||
"\n\nCheck out [the code on GitHub](https://github.com/modal-labs/examples/blob/main/10_integrations/cloud_bucket_mount_loras.py)" | ||
" if you want to create your own version or just see how it works." | ||
"\n\nPowered by [Modal](https://modal.com) 🚀", | ||
theme="soft", | ||
allow_flagging="never", | ||
) | ||
|
||
return mount_gradio_app(app=web_app, blocks=iface, path="/") | ||
|
||
|
||
def as_slug(name): | ||
"""Converts a string, e.g. a prompt, into something we can use as a filename.""" | ||
import re | ||
|
||
s = str(name).strip().replace(" ", "-") | ||
s = re.sub(r"(?u)[^-\w.]", "", s) | ||
return s |
Binary file added
BIN
+190 KB
10_integrations/ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.