From 51d224a4fc701fd9ee1876dd6b444631ee2e4d07 Mon Sep 17 00:00:00 2001 From: Rick Zhou Date: Tue, 18 Jun 2024 23:59:42 +0000 Subject: [PATCH] Update gen_readme.py --- gen_readme.py | 72 +++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/gen_readme.py b/gen_readme.py index a035b1a1..251258d7 100644 --- a/gen_readme.py +++ b/gen_readme.py @@ -4,66 +4,76 @@ from collections import defaultdict from jinja2 import Environment, FileSystemLoader -root_dir = 'bentoml/bentos' +root_dir = "bentoml/bentos" grouped_data = defaultdict(list) model_display_names = { - 'llama3': 'Llama-3', - 'phi3': 'Phi-3', - 'mistral': 'Mistral', - 'qwen2': 'Qwen-2', - 'gemma': 'Gemma', - 'llama2': 'Llama-2', + "llama3": "Llama-3", + "phi3": "Phi-3", + "mistral": "Mistral", + "qwen2": "Qwen-2", + "gemma": "Gemma", + "llama2": "Llama-2", } model_priority = { - 'llama3': 1, - 'phi3': 2, - 'mistral': 3, - 'qwen2': 4, - 'gemma': 5, - 'llama2': 6, + "llama3": 1, + "phi3": 2, + "mistral": 3, + "qwen2": 4, + "gemma": 5, + "llama2": 6, } -yaml_files = glob.glob(os.path.join(root_dir, '**/bento.yaml'), recursive=True) +yaml_files = glob.glob(os.path.join(root_dir, "**/bento.yaml"), recursive=True) # Read each bento.yaml file and group data by "name" field for yaml_file in yaml_files: - with open(yaml_file, 'r') as f: + with open(yaml_file, "r") as f: data = yaml.safe_load(f) # Extract the HF model ID from routes.input.model.default - for route in data.get('schema', {}).get('routes', []): - for prop, details in route.get('input', {}).get('properties', {}).items(): - if prop == 'model' and 'default' in details: - data['hf_model'] = details['default'] + for route in data.get("schema", {}).get("routes", []): + for prop, details in route.get("input", {}).get("properties", {}).items(): + if prop == "model" and "default" in details: + data["hf_model"] = details["default"] break # Append data to grouped_data - if 'name' in data and 'version' in data and 'hf_model' in data: - grouped_data[data['name']].append({ - 'name': data['name'], - 'version': data['version'], - 'hf_model': data['hf_model'] - }) + if "name" in data and "version" in data and "hf_model" in data: + grouped_data[data["name"]].append( + { + "name": data["name"], + "version": data["version"], + "hf_model": data["hf_model"], + } + ) # Deduplicate entries by converting lists to sets of tuples and back to lists for name in grouped_data: seen = set() deduped_items = [] for item in grouped_data[name]: - item_tuple = (item['name'], item['version'], item['hf_model']) + item_tuple = (item["name"], item["version"], item["hf_model"]) if item_tuple not in seen: seen.add(item_tuple) deduped_items.append(item) grouped_data[name] = deduped_items +# Sort the items within each group by version +for name in grouped_data: + grouped_data[name] = sorted(grouped_data[name], key=lambda x: x["version"]) + # Sort the model names by priority and transform grouped_data into a list -sorted_grouped_data = sorted(grouped_data.items(), key=lambda x: model_priority.get(x[0], float('inf'))) +sorted_grouped_data = sorted( + grouped_data.items(), key=lambda x: model_priority.get(x[0], float("inf")) +) # Set up Jinja environment and load the template -env = Environment(loader=FileSystemLoader('.')) -template = env.get_template('readme_md.tpl') +env = Environment(loader=FileSystemLoader(".")) +template = env.get_template("readme_md.tpl") -readme_content = template.render(grouped_data=sorted_grouped_data, model_display_names=model_display_names) +readme_content = template.render( + grouped_data=sorted_grouped_data, model_display_names=model_display_names +) -with open('README.md', 'w') as readme_file: +with open("README.md", "w") as readme_file: readme_file.write(readme_content)