From 216e90ca927869169f2c5b2483005ef8623fbf6d Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Wed, 2 Aug 2023 18:37:24 -0400 Subject: [PATCH 1/5] no xformers --- requirements-no-xformers.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 requirements-no-xformers.txt diff --git a/requirements-no-xformers.txt b/requirements-no-xformers.txt new file mode 100644 index 00000000..5095ebb4 --- /dev/null +++ b/requirements-no-xformers.txt @@ -0,0 +1,21 @@ +# please make sure you have already a pytorch install that is cuda enabled! +av +einops +flashy>=0.0.1 +hydra-core>=1.1 +hydra_colorlog +julius +num2words +numpy +sentencepiece +spacy==3.5.2 +torch>=2.0.0 +torchaudio>=2.0.0 +huggingface_hub +tqdm +transformers>=4.31.0 # need Encodec there. +demucs +librosa +gradio +torchmetrics +encodec From 1f92d7f4276cc3c610b76573f15bbefd1b25ec6e Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Fri, 4 Aug 2023 05:27:19 -0400 Subject: [PATCH 2/5] audiogen --- demos/audiogen_app.py | 254 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 demos/audiogen_app.py diff --git a/demos/audiogen_app.py b/demos/audiogen_app.py new file mode 100644 index 00000000..3d8c6889 --- /dev/null +++ b/demos/audiogen_app.py @@ -0,0 +1,254 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Updated to account for UI changes from https://github.com/rkfg/audiocraft/blob/long/app.py +# also released under the MIT license. + +import argparse +from concurrent.futures import ProcessPoolExecutor +import os +from pathlib import Path +import subprocess as sp +from tempfile import NamedTemporaryFile +import time +import typing as tp +import warnings + +import torch +import gradio as gr + +from audiocraft.data.audio_utils import convert_audio +from audiocraft.data.audio import audio_write +from audiocraft.models import AudioGen, MultiBandDiffusion + + +MODEL = None # Last used model +INTERRUPTING = False +# We have to wrap subprocess call to clean a bit the log when using gr.make_waveform +_old_call = sp.call + + +def _call_nostderr(*args, **kwargs): + # Avoid ffmpeg vomiting on the logs. + kwargs['stderr'] = sp.DEVNULL + kwargs['stdout'] = sp.DEVNULL + _old_call(*args, **kwargs) + + +sp.call = _call_nostderr +# Preallocating the pool of processes. +pool = ProcessPoolExecutor(4) +pool.__enter__() + + +def interrupt(): + global INTERRUPTING + INTERRUPTING = True + + +class FileCleaner: + def __init__(self, file_lifetime: float = 3600): + self.file_lifetime = file_lifetime + self.files = [] + + def add(self, path: tp.Union[str, Path]): + self._cleanup() + self.files.append((time.time(), Path(path))) + + def _cleanup(self): + now = time.time() + for time_added, path in list(self.files): + if now - time_added > self.file_lifetime: + if path.exists(): + path.unlink() + self.files.pop(0) + else: + break + + +file_cleaner = FileCleaner() + + +def make_waveform(*args, **kwargs): + # Further remove some warnings. + be = time.time() + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + out = gr.make_waveform(*args, **kwargs) + print("Make a video took", time.time() - be) + return out + + +def load_model(version='facebook/audiogen-medium'): + global MODEL + print("Loading model", version) + if MODEL is None or MODEL.name != version: + MODEL = AudioGen.get_pretrained(version) + + +def load_diffusion(): + global MBD + print("loading MBD") + MBD = MultiBandDiffusion.get_mbd_musicgen() + + +def _do_predictions(texts, duration, progress=False, **gen_kwargs): + MODEL.set_generation_params(duration=duration, **gen_kwargs) + be = time.time() + target_sr = 32000 + target_ac = 1 + + outputs = MODEL.generate(texts, progress=progress) + if USE_DIFFUSION: + outputs_diffusion = MBD.tokens_to_wav(outputs[1]) + outputs = torch.cat([outputs[0], outputs_diffusion], dim=0) + outputs = outputs.detach().cpu().float() + pending_videos = [] + out_wavs = [] + for output in outputs: + with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file: + audio_write( + file.name, output, MODEL.sample_rate, strategy="loudness", + loudness_headroom_db=16, loudness_compressor=True, add_suffix=False) + pending_videos.append(pool.submit(make_waveform, file.name)) + out_wavs.append(file.name) + file_cleaner.add(file.name) + out_videos = [pending_video.result() for pending_video in pending_videos] + for video in out_videos: + file_cleaner.add(video) + print("batch finished", len(texts), time.time() - be) + print("Tempfiles currently stored: ", len(file_cleaner.files)) + return out_videos, out_wavs + + + +def predict_full(model, decoder, text, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()): + global INTERRUPTING + global USE_DIFFUSION + INTERRUPTING = False + if temperature < 0: + raise gr.Error("Temperature must be >= 0.") + if topk < 0: + raise gr.Error("Topk must be non-negative.") + if topp < 0: + raise gr.Error("Topp must be non-negative.") + + topk = int(topk) + if decoder == "MultiBand_Diffusion": + USE_DIFFUSION = True + load_diffusion() + else: + USE_DIFFUSION = False + load_model(model) + + def _progress(generated, to_generate): + progress((min(generated, to_generate), to_generate)) + if INTERRUPTING: + raise gr.Error("Interrupted.") + MODEL.set_custom_progress_callback(_progress) + + videos, wavs = _do_predictions( + [text], duration, progress=True, + top_k=topk, top_p=topp, temperature=temperature, cfg_coef=cfg_coef) + if USE_DIFFUSION: + return videos[0], wavs[0], videos[1], wavs[1] + return videos[0], wavs[0], None, None + + + +def toggle_diffusion(choice): + if choice == "MultiBand_Diffusion": + return [gr.update(visible=True)] * 2 + else: + return [gr.update(visible=False)] * 2 + + +def ui_full(launch_kwargs): + with gr.Blocks() as interface: + gr.Markdown( + """ + # AudioGen + This is your private demo for [AudioGen](https://github.com/facebookresearch/audiocraft/blob/main/docs/AUDIOGEN.md), + a simple and controllable model for music generation + """ + ) + with gr.Row(): + with gr.Column(): + with gr.Row(): + text = gr.Text(label="Input Text", interactive=True) + with gr.Row(): + submit = gr.Button("Submit") + # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license. + _ = gr.Button("Interrupt").click(fn=interrupt, queue=False) + with gr.Row(): + model = gr.Radio(["facebook/audiogen-medium"], label="Model", value="facebook/audiogen-medium", interactive=True) + with gr.Row(): + decoder = gr.Radio(["Default", "MultiBand_Diffusion"], + label="Decoder", value="Default", interactive=True) + with gr.Row(): + duration = gr.Slider(minimum=1, maximum=120, value=10, label="Duration", interactive=True) + with gr.Row(): + topk = gr.Number(label="Top-k", value=250, interactive=True) + topp = gr.Number(label="Top-p", value=0, interactive=True) + temperature = gr.Number(label="Temperature", value=1.0, interactive=True) + cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True) + with gr.Column(): + output = gr.Video(label="Generated Music") + audio_output = gr.Audio(label="Generated Music (wav)", type='filepath') + diffusion_output = gr.Video(label="MultiBand Diffusion Decoder") + audio_diffusion = gr.Audio(label="MultiBand Diffusion Decoder (wav)", type='filepath') + submit.click(toggle_diffusion, decoder, [diffusion_output, audio_diffusion], queue=False, + show_progress=False).then(predict_full, inputs=[model, decoder, text, duration, topk, topp, + temperature, cfg_coef], + outputs=[output, audio_output, diffusion_output, audio_diffusion]) + + interface.queue().launch(**launch_kwargs) + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--listen', + type=str, + default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1', + help='IP to listen on for connections to Gradio', + ) + parser.add_argument( + '--username', type=str, default='', help='Username for authentication' + ) + parser.add_argument( + '--password', type=str, default='', help='Password for authentication' + ) + parser.add_argument( + '--server_port', + type=int, + default=0, + help='Port to run the server listener on', + ) + parser.add_argument( + '--inbrowser', action='store_true', help='Open in browser' + ) + parser.add_argument( + '--share', action='store_true', help='Share the gradio UI' + ) + + args = parser.parse_args() + + launch_kwargs = {} + launch_kwargs['server_name'] = args.listen + + if args.username and args.password: + launch_kwargs['auth'] = (args.username, args.password) + if args.server_port: + launch_kwargs['server_port'] = args.server_port + if args.inbrowser: + launch_kwargs['inbrowser'] = args.inbrowser + if args.share: + launch_kwargs['share'] = args.share + + # Show the interface + ui_full(launch_kwargs) From 0a75e715f6fee73ebf92c1f8105658ace0cf782b Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Fri, 4 Aug 2023 14:22:34 -0400 Subject: [PATCH 3/5] remove multiband for now --- demos/audiogen_app.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/demos/audiogen_app.py b/demos/audiogen_app.py index 3d8c6889..8b6e814b 100644 --- a/demos/audiogen_app.py +++ b/demos/audiogen_app.py @@ -156,6 +156,7 @@ def _progress(generated, to_generate): if USE_DIFFUSION: return videos[0], wavs[0], videos[1], wavs[1] return videos[0], wavs[0], None, None + return videos[0], wavs[0] @@ -186,8 +187,7 @@ def ui_full(launch_kwargs): with gr.Row(): model = gr.Radio(["facebook/audiogen-medium"], label="Model", value="facebook/audiogen-medium", interactive=True) with gr.Row(): - decoder = gr.Radio(["Default", "MultiBand_Diffusion"], - label="Decoder", value="Default", interactive=True) + decoder = gr.Radio(["Default"], label="Decoder", value="Default", interactive=False) with gr.Row(): duration = gr.Slider(minimum=1, maximum=120, value=10, label="Duration", interactive=True) with gr.Row(): @@ -198,12 +198,7 @@ def ui_full(launch_kwargs): with gr.Column(): output = gr.Video(label="Generated Music") audio_output = gr.Audio(label="Generated Music (wav)", type='filepath') - diffusion_output = gr.Video(label="MultiBand Diffusion Decoder") - audio_diffusion = gr.Audio(label="MultiBand Diffusion Decoder (wav)", type='filepath') - submit.click(toggle_diffusion, decoder, [diffusion_output, audio_diffusion], queue=False, - show_progress=False).then(predict_full, inputs=[model, decoder, text, duration, topk, topp, - temperature, cfg_coef], - outputs=[output, audio_output, diffusion_output, audio_diffusion]) + submit.click(predict_full, inputs=[model, decoder, text, duration, topk, topp, temperature, cfg_coef], outputs=[output, audio_output]) interface.queue().launch(**launch_kwargs) From 852db42b2cef298e5474c98c481f4936a2ed5d3f Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Fri, 4 Aug 2023 16:18:53 -0400 Subject: [PATCH 4/5] music => audio --- demos/audiogen_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demos/audiogen_app.py b/demos/audiogen_app.py index 8b6e814b..0193bad7 100644 --- a/demos/audiogen_app.py +++ b/demos/audiogen_app.py @@ -173,7 +173,7 @@ def ui_full(launch_kwargs): """ # AudioGen This is your private demo for [AudioGen](https://github.com/facebookresearch/audiocraft/blob/main/docs/AUDIOGEN.md), - a simple and controllable model for music generation + a simple and controllable model for audio generation """ ) with gr.Row(): @@ -196,8 +196,8 @@ def ui_full(launch_kwargs): temperature = gr.Number(label="Temperature", value=1.0, interactive=True) cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True) with gr.Column(): - output = gr.Video(label="Generated Music") - audio_output = gr.Audio(label="Generated Music (wav)", type='filepath') + output = gr.Video(label="Generated Audio") + audio_output = gr.Audio(label="Generated Audio (wav)", type='filepath') submit.click(predict_full, inputs=[model, decoder, text, duration, topk, topp, temperature, cfg_coef], outputs=[output, audio_output]) interface.queue().launch(**launch_kwargs) From a6b772f77e272a04b84a0db0f3b5d9b2f54e62cc Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Sun, 6 Aug 2023 20:58:45 -0400 Subject: [PATCH 5/5] remove requirements-no-xformers.txt --- requirements-no-xformers.txt | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 requirements-no-xformers.txt diff --git a/requirements-no-xformers.txt b/requirements-no-xformers.txt deleted file mode 100644 index 5095ebb4..00000000 --- a/requirements-no-xformers.txt +++ /dev/null @@ -1,21 +0,0 @@ -# please make sure you have already a pytorch install that is cuda enabled! -av -einops -flashy>=0.0.1 -hydra-core>=1.1 -hydra_colorlog -julius -num2words -numpy -sentencepiece -spacy==3.5.2 -torch>=2.0.0 -torchaudio>=2.0.0 -huggingface_hub -tqdm -transformers>=4.31.0 # need Encodec there. -demucs -librosa -gradio -torchmetrics -encodec