From a06574e0cf12eda7d2374b3ba01dc7a0a3bb3ae6 Mon Sep 17 00:00:00 2001 From: hacobe <91226467+hacobe@users.noreply.github.com> Date: Fri, 6 Jan 2023 18:28:30 -0800 Subject: [PATCH 01/13] Document running the entire benchmarking suite and testing if the results differ from the paper. --- benchmarking/README.md | 81 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index 3f5114545..26b70a417 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -4,16 +4,93 @@ This directory contains sacred configuration files for benchmarking imitation's Configuration files can be loaded either from the CLI or from the Python API. The examples below assume that your current working directory is the root of the `imitation` repository. This is not necessarily the case and you should adjust your paths accordingly. -## CLI +To run a single benchmark from the command line: ```bash python -m imitation.scripts. with benchmarking/.json ``` + `train_script` can be either 1) `train_imitation` with `algo` as `bc` or `dagger` or 2) `train_adversarial` with `algo` as `gail` or `airl`. -## Python +To run a single benchmark from Python add the config to your experiment: ```python ... ex.add_config('benchmarking/.json') ``` + +To generate the commands to run the entire benchmarking suite with multiple random seeds: + +```bash +python experiments/commands.py \ + --name=run0 \ + --cfg_pattern=benchmarking/example_*.json \ + --seeds 0,1,2 \ + --output_dir=output +``` + +To run those commands in parallel: + +```bash +python experiments/commands.py ... | parallel -j 8 +``` + +To generate the commands for the Hofvarpnir cluster: + +```bash +python experiments/commands.py \ + --name=run0 \ + --cfg_pattern=benchmarking/example_*.json \ + --seeds 0,1,2 \ + --output_dir=/data/output \ + --remote +``` + +To run those commands pipe them into bash: + +```bash +python experiments/commands.py ... | bash +``` + +To produce a table with all the results: + +```bash +python -m imitation.scripts.analyze analyze_imitation with \ + source_dir_str="output/sacred" table_verbosity=0 \ + csv_output_path=results.csv \ + run_name="run0" +``` + +To compute a p-value to test whether the differences from the paper are statistically significant: + +```python +import pandas as pd +import numpy as np +import scipy + +data = pd.read_csv("results.csv") +data["imit_return"] = data["imit_return_summary"].apply(lambda x: float(x.split(" ")[0])) +summary = data[["algo", "env_name", "imit_return"]].groupby(["algo", "env_name"]).describe() +summary.columns = summary.columns.get_level_values(1) +summary = summary.reset_index() + +# Table 2 (https://arxiv.org/pdf/2211.11972.pdf) +paper = pd.DataFrame.from_records([ + {"algo": "BC", "env_name": "seals/Ant-v0", "mean": 1953, "margin": 123}, + {"algo": "BC", "env_name": "seals/HalfCheetah-v0", "mean": 3446, "margin": 130}, +]) +paper["count"] = 5 +paper["confidence_level"] = 0.95 +# Back out the standard deviation from the margin of error. +paper["std"] = (paper["margin"] * paper["count"]) / scipy.stats.t.ppf(1-((1-paper["confidence_level"])/2), paper["count"] -1) + +comparison = pd.merge(summary, paper, on=["algo", "env_name"]) + +comparison["pvalue"] = scipy.stats.ttest_ind_from_stats( + comparison["mean_x"], + comparison["std_x"], + comparison["count_x"], + comparison["mean_y"], + comparison["std_y"], + comparison["count_y"]).pvalue +``` From d8a82dc87160782b32c9fe5ae0fa3d13e4d763cd Mon Sep 17 00:00:00 2001 From: hacobe <91226467+hacobe@users.noreply.github.com> Date: Sat, 7 Jan 2023 13:22:21 -0800 Subject: [PATCH 02/13] Fix seeds flag. --- benchmarking/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index 26b70a417..032522554 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -25,7 +25,7 @@ To generate the commands to run the entire benchmarking suite with multiple rand python experiments/commands.py \ --name=run0 \ --cfg_pattern=benchmarking/example_*.json \ - --seeds 0,1,2 \ + --seeds 0 1 2 \ --output_dir=output ``` @@ -41,7 +41,7 @@ To generate the commands for the Hofvarpnir cluster: python experiments/commands.py \ --name=run0 \ --cfg_pattern=benchmarking/example_*.json \ - --seeds 0,1,2 \ + --seeds 0 1 2 \ --output_dir=/data/output \ --remote ``` From d5587d54ed32dd94aa40d604368f02a37e27c775 Mon Sep 17 00:00:00 2001 From: hacobe <91226467+hacobe@users.noreply.github.com> Date: Sun, 8 Jan 2023 09:26:27 -0800 Subject: [PATCH 03/13] Fix margin of error formula --- benchmarking/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index 032522554..e5ca16184 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -82,7 +82,7 @@ paper = pd.DataFrame.from_records([ paper["count"] = 5 paper["confidence_level"] = 0.95 # Back out the standard deviation from the margin of error. -paper["std"] = (paper["margin"] * paper["count"]) / scipy.stats.t.ppf(1-((1-paper["confidence_level"])/2), paper["count"] -1) +paper["std"] = (paper["margin"] * np.sqrt(paper["count"])) / scipy.stats.t.ppf(1-((1-paper["confidence_level"])/2), paper["count"] -1) comparison = pd.merge(summary, paper, on=["algo", "env_name"]) From d9f63a8357d598cb3c8449f326024443c53cc20b Mon Sep 17 00:00:00 2001 From: timbauman Date: Mon, 8 May 2023 14:25:32 -0700 Subject: [PATCH 04/13] format --- benchmarking/README.md | 81 ++++++++-------- src/imitation/scripts/compare_to_baseline.py | 98 ++++++++++++++++++++ 2 files changed, 140 insertions(+), 39 deletions(-) create mode 100644 src/imitation/scripts/compare_to_baseline.py diff --git a/benchmarking/README.md b/benchmarking/README.md index e5ca16184..d75c1b6f7 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -4,27 +4,43 @@ This directory contains sacred configuration files for benchmarking imitation's Configuration files can be loaded either from the CLI or from the Python API. The examples below assume that your current working directory is the root of the `imitation` repository. This is not necessarily the case and you should adjust your paths accordingly. +## Single benchmark + To run a single benchmark from the command line: ```bash -python -m imitation.scripts. with benchmarking/.json +python -m imitation.scripts. \ + --name= with benchmarking/.json ``` `train_script` can be either 1) `train_imitation` with `algo` as `bc` or `dagger` or 2) `train_adversarial` with `algo` as `gail` or `airl`. -To run a single benchmark from Python add the config to your experiment: +To view the results: + +```bash +python -m imitation.scripts.analyze analyze_imitation with \ + source_dir_str="output/sacred" table_verbosity=0 \ + csv_output_path=results.csv \ + run_name="" +``` + +To run a single benchmark from Python add the config to your experiment: (what does this mean?? do we have an example Python experiment?) ```python ... ex.add_config('benchmarking/.json') ``` +## Entire benchmark suite + +### Running locally + To generate the commands to run the entire benchmarking suite with multiple random seeds: ```bash python experiments/commands.py \ - --name=run0 \ - --cfg_pattern=benchmarking/example_*.json \ + --name= \ + --cfg_pattern "benchmarking/example_*.json" \ --seeds 0 1 2 \ --output_dir=output ``` @@ -32,15 +48,23 @@ python experiments/commands.py \ To run those commands in parallel: ```bash -python experiments/commands.py ... | parallel -j 8 +python experiments/commands.py \ + --name= \ + --cfg_pattern "benchmarking/example_*.json" \ + --seeds 0 1 2 \ + --output_dir=output | parallel -j 8 ``` +(You may need to `brew install parallel` to get this to work on Mac) + +### Running on Hofvarpnir + To generate the commands for the Hofvarpnir cluster: ```bash python experiments/commands.py \ - --name=run0 \ - --cfg_pattern=benchmarking/example_*.json \ + --name= \ + --cfg_pattern "benchmarking/example_*.json" \ --seeds 0 1 2 \ --output_dir=/data/output \ --remote @@ -49,48 +73,27 @@ python experiments/commands.py \ To run those commands pipe them into bash: ```bash -python experiments/commands.py ... | bash +python experiments/commands.py \ + --name \ + --cfg_pattern "benchmarking/example_*.json" \ + --seeds 0 1 2 \ + --output_dir /data/output \ + --remote | bash ``` +### Results + To produce a table with all the results: ```bash python -m imitation.scripts.analyze analyze_imitation with \ source_dir_str="output/sacred" table_verbosity=0 \ csv_output_path=results.csv \ - run_name="run0" + run_name="" ``` To compute a p-value to test whether the differences from the paper are statistically significant: -```python -import pandas as pd -import numpy as np -import scipy - -data = pd.read_csv("results.csv") -data["imit_return"] = data["imit_return_summary"].apply(lambda x: float(x.split(" ")[0])) -summary = data[["algo", "env_name", "imit_return"]].groupby(["algo", "env_name"]).describe() -summary.columns = summary.columns.get_level_values(1) -summary = summary.reset_index() - -# Table 2 (https://arxiv.org/pdf/2211.11972.pdf) -paper = pd.DataFrame.from_records([ - {"algo": "BC", "env_name": "seals/Ant-v0", "mean": 1953, "margin": 123}, - {"algo": "BC", "env_name": "seals/HalfCheetah-v0", "mean": 3446, "margin": 130}, -]) -paper["count"] = 5 -paper["confidence_level"] = 0.95 -# Back out the standard deviation from the margin of error. -paper["std"] = (paper["margin"] * np.sqrt(paper["count"])) / scipy.stats.t.ppf(1-((1-paper["confidence_level"])/2), paper["count"] -1) - -comparison = pd.merge(summary, paper, on=["algo", "env_name"]) - -comparison["pvalue"] = scipy.stats.ttest_ind_from_stats( - comparison["mean_x"], - comparison["std_x"], - comparison["count_x"], - comparison["mean_y"], - comparison["std_y"], - comparison["count_y"]).pvalue +```bash +python -m imitation.scripts.compare_to_baseline results.csv ``` diff --git a/src/imitation/scripts/compare_to_baseline.py b/src/imitation/scripts/compare_to_baseline.py new file mode 100644 index 000000000..92b4f97e8 --- /dev/null +++ b/src/imitation/scripts/compare_to_baseline.py @@ -0,0 +1,98 @@ +"""Compare experiment results to baseline results. + +This script compares experiment results to the results reported in the +[paper](https://arxiv.org/pdf/2211.11972.pdf). It takes as input a CSV file +containing experiment results, and outputs a table of p-values comparing the experiment +results to the baseline results. + +Usage: + $ python compare_to_baseline.py + +The results file should be a CSV file containing the following columns: + * algo: The name of the imitation algorithm. + * env_name: The name of the environment. + * imit_return_summary: A string containing the mean and standard deviation of the + experiment returns, as reported by `imitation.scripts.analyze`. +""" + +import numpy as np +import pandas as pd +import scipy + +from imitation.data import types + + +def compare_results_to_baseline(results_file: types.AnyPath) -> str: + """Compare benchmark results to baseline results. + + Args: + results_file: Path to a CSV file containing experiment results. + + Returns: + A string containing a table of p-values comparing the experiment results to + the baseline results. + """ + data = pd.read_csv(results_file) + data["imit_return"] = data["imit_return_summary"].apply( + lambda x: float(x.split(" ")[0]), + ) + summary = ( + data[["algo", "env_name", "imit_return"]] + .groupby(["algo", "env_name"]) + .describe() + ) + summary.columns = summary.columns.get_level_values(1) + summary = summary.reset_index() + + # Table 2 (https://arxiv.org/pdf/2211.11972.pdf) + # todo: store results in this repo outside this file + paper = pd.DataFrame.from_records( + [ + { + "algo": "??exp_command=bc", + "env_name": "seals/Ant-v0", + "mean": 1953, + "margin": 123, + }, + { + "algo": "??exp_command=bc", + "env_name": "seals/HalfCheetah-v0", + "mean": 3446, + "margin": 130, + }, + ], + ) + paper["count"] = 5 + paper["confidence_level"] = 0.95 + # Back out the standard deviation from the margin of error. + paper["std"] = (paper["margin"] * np.sqrt(paper["count"])) / scipy.stats.t.ppf( + 1 - ((1 - paper["confidence_level"]) / 2), + paper["count"] - 1, + ) + + comparison = pd.merge(summary, paper, on=["algo", "env_name"]) + + comparison["pvalue"] = scipy.stats.ttest_ind_from_stats( + comparison["mean_x"], + comparison["std_x"], + comparison["count_x"], + comparison["mean_y"], + comparison["std_y"], + comparison["count_y"], + ).pvalue + + return comparison[["algo", "env_name", "pvalue"]].to_string() + + +def main() -> None: # pragma: no cover + """Run the script.""" + import sys + + if len(sys.argv) != 2: + print("Supply a path to a results file") + else: + print(compare_results_to_baseline(sys.argv[1])) + + +if __name__ == "__main__": + main() From 54094eb39690c318b8cd55a1ef8ed9e1b3d64fe2 Mon Sep 17 00:00:00 2001 From: timbauman Date: Mon, 8 May 2023 14:37:50 -0700 Subject: [PATCH 05/13] minor edits --- benchmarking/README.md | 6 +++--- src/imitation/scripts/compare_to_baseline.py | 16 +++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index d75c1b6f7..b3722ca22 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -1,8 +1,8 @@ # Benchmarking imitation -This directory contains sacred configuration files for benchmarking imitation's algorithms. For v0.3.2, these correspond to the hyperparameters used in the paper [imitation: Clean Imitation Learning Implementations](https://www.rocamonde.com/publication/gleave-imitation-2022/). +This directory contains Sacred configuration files for benchmarking imitation's algorithms. For v0.3.2, these correspond to the hyperparameters used in the paper [imitation: Clean Imitation Learning Implementations](https://www.rocamonde.com/publication/gleave-imitation-2022/). -Configuration files can be loaded either from the CLI or from the Python API. The examples below assume that your current working directory is the root of the `imitation` repository. This is not necessarily the case and you should adjust your paths accordingly. +Configuration files can be loaded either from the CLI or from the Python API. The examples below assume that your current working directory is the root of the `imitation` repository. ## Single benchmark @@ -24,7 +24,7 @@ python -m imitation.scripts.analyze analyze_imitation with \ run_name="" ``` -To run a single benchmark from Python add the config to your experiment: (what does this mean?? do we have an example Python experiment?) +To run a single benchmark from Python add the config to your experiment: ```python ... diff --git a/src/imitation/scripts/compare_to_baseline.py b/src/imitation/scripts/compare_to_baseline.py index 92b4f97e8..9ceb411da 100644 --- a/src/imitation/scripts/compare_to_baseline.py +++ b/src/imitation/scripts/compare_to_baseline.py @@ -46,7 +46,7 @@ def compare_results_to_baseline(results_file: types.AnyPath) -> str: # Table 2 (https://arxiv.org/pdf/2211.11972.pdf) # todo: store results in this repo outside this file - paper = pd.DataFrame.from_records( + baseline = pd.DataFrame.from_records( [ { "algo": "??exp_command=bc", @@ -62,15 +62,17 @@ def compare_results_to_baseline(results_file: types.AnyPath) -> str: }, ], ) - paper["count"] = 5 - paper["confidence_level"] = 0.95 + baseline["count"] = 5 + baseline["confidence_level"] = 0.95 # Back out the standard deviation from the margin of error. - paper["std"] = (paper["margin"] * np.sqrt(paper["count"])) / scipy.stats.t.ppf( - 1 - ((1 - paper["confidence_level"]) / 2), - paper["count"] - 1, + baseline["std"] = ( + baseline["margin"] * np.sqrt(baseline["count"]) + ) / scipy.stats.t.ppf( + 1 - ((1 - baseline["confidence_level"]) / 2), + baseline["count"] - 1, ) - comparison = pd.merge(summary, paper, on=["algo", "env_name"]) + comparison = pd.merge(summary, baseline, on=["algo", "env_name"]) comparison["pvalue"] = scipy.stats.ttest_ind_from_stats( comparison["mean_x"], From 25cc78f52842ad0aa576e1a98330962add33f313 Mon Sep 17 00:00:00 2001 From: timbauman Date: Mon, 8 May 2023 15:27:23 -0700 Subject: [PATCH 06/13] adding tests --- benchmarking/README.md | 2 ++ src/imitation/scripts/compare_to_baseline.py | 6 ++--- tests/scripts/test_scripts.py | 27 ++++++++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index b3722ca22..a5e33afb6 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -97,3 +97,5 @@ To compute a p-value to test whether the differences from the paper are statisti ```bash python -m imitation.scripts.compare_to_baseline results.csv ``` + +TODO: Updating reference benchmark data diff --git a/src/imitation/scripts/compare_to_baseline.py b/src/imitation/scripts/compare_to_baseline.py index 9ceb411da..c60812b20 100644 --- a/src/imitation/scripts/compare_to_baseline.py +++ b/src/imitation/scripts/compare_to_baseline.py @@ -22,7 +22,7 @@ from imitation.data import types -def compare_results_to_baseline(results_file: types.AnyPath) -> str: +def compare_results_to_baseline(results_file: types.AnyPath) -> pd.DataFrame: """Compare benchmark results to baseline results. Args: @@ -83,7 +83,7 @@ def compare_results_to_baseline(results_file: types.AnyPath) -> str: comparison["count_y"], ).pvalue - return comparison[["algo", "env_name", "pvalue"]].to_string() + return comparison[["algo", "env_name", "pvalue"]] def main() -> None: # pragma: no cover @@ -93,7 +93,7 @@ def main() -> None: # pragma: no cover if len(sys.argv) != 2: print("Supply a path to a results file") else: - print(compare_results_to_baseline(sys.argv[1])) + print(compare_results_to_baseline(sys.argv[1]).to_string()) if __name__ == "__main__": diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index a815c8a01..0387a3f5e 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -33,6 +33,7 @@ from imitation.rewards import reward_nets from imitation.scripts import ( analyze, + compare_to_baseline, convert_trajs, eval_policy, parallel, @@ -1075,3 +1076,29 @@ def test_convert_trajs_from_current_format_is_idempotent( assert ( filecmp.dircmp(converted_path, original_path).diff_files == [] ), "convert_trajs not idempotent" + +@pytest.mark.parametrize( + "imit_returns,p_value", + [( + [2000, 1900, 2100], + 0.6, + ), ( + [1000, 900, 1100], + 0.05, +)], +) +def test_compare_to_baseline_p_values(tmpdir: str, imit_returns: List[float], p_value: float): + comparison = pd.DataFrame.from_records( + [ + { + "algo": "??exp_command=bc", + "env_name": "seals/Ant-v0", + "imit_return_summary": f"{imit_return} +/- 0.0", + } + for imit_return in imit_returns + ], + ) + tmpfile = pathlib.Path(tmpdir) / "comparison.csv" + comparison.to_csv(tmpfile) + + assert compare_to_baseline.compare_results_to_baseline(results_file=tmpfile)["pvalue"][0] < p_value From 6a378d8d2b72c04dd327409e4921461949bbd1eb Mon Sep 17 00:00:00 2001 From: timbauman Date: Mon, 8 May 2023 15:30:02 -0700 Subject: [PATCH 07/13] Add test --- benchmarking/README.md | 2 -- tests/scripts/test_scripts.py | 31 ++++++++++++++++++++++--------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index a5e33afb6..b3722ca22 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -97,5 +97,3 @@ To compute a p-value to test whether the differences from the paper are statisti ```bash python -m imitation.scripts.compare_to_baseline results.csv ``` - -TODO: Updating reference benchmark data diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 0387a3f5e..f704e628f 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -1077,17 +1077,25 @@ def test_convert_trajs_from_current_format_is_idempotent( filecmp.dircmp(converted_path, original_path).diff_files == [] ), "convert_trajs not idempotent" + @pytest.mark.parametrize( "imit_returns,p_value", - [( - [2000, 1900, 2100], - 0.6, - ), ( - [1000, 900, 1100], - 0.05, -)], + [ + ( + [2000, 1900, 2100], + 0.6, + ), + ( + [1000, 900, 1100], + 0.05, + ), + ], ) -def test_compare_to_baseline_p_values(tmpdir: str, imit_returns: List[float], p_value: float): +def test_compare_to_baseline_p_values( + tmpdir: str, + imit_returns: List[float], + p_value: float, +): comparison = pd.DataFrame.from_records( [ { @@ -1101,4 +1109,9 @@ def test_compare_to_baseline_p_values(tmpdir: str, imit_returns: List[float], p_ tmpfile = pathlib.Path(tmpdir) / "comparison.csv" comparison.to_csv(tmpfile) - assert compare_to_baseline.compare_results_to_baseline(results_file=tmpfile)["pvalue"][0] < p_value + assert ( + compare_to_baseline.compare_results_to_baseline(results_file=tmpfile)["pvalue"][ + 0 + ] + < p_value + ) From b890b4e06399200ac5b314a90f5e0b26e3cfc737 Mon Sep 17 00:00:00 2001 From: Tim Bauman Date: Tue, 9 May 2023 08:59:45 -0700 Subject: [PATCH 08/13] Update benchmarking/README.md Co-authored-by: Adam Gleave --- benchmarking/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index b3722ca22..b6e247839 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -24,7 +24,7 @@ python -m imitation.scripts.analyze analyze_imitation with \ run_name="" ``` -To run a single benchmark from Python add the config to your experiment: +To run a single benchmark from Python add the config to your Sacred experiment `ex`: ```python ... From a23740535acbba1cad77f1a4ad218499cfc36cf9 Mon Sep 17 00:00:00 2001 From: Tim Bauman Date: Tue, 9 May 2023 08:59:53 -0700 Subject: [PATCH 09/13] Update benchmarking/README.md Co-authored-by: Adam Gleave --- benchmarking/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index b6e247839..7f05c7017 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -55,7 +55,7 @@ python experiments/commands.py \ --output_dir=output | parallel -j 8 ``` -(You may need to `brew install parallel` to get this to work on Mac) +(You may need to `brew install parallel` to get this to work on Mac.) ### Running on Hofvarpnir From 27b9d82cb6d6a04be8953a313c992f2d942b0fe7 Mon Sep 17 00:00:00 2001 From: timbauman Date: Tue, 9 May 2023 10:04:20 -0700 Subject: [PATCH 10/13] expand expression --- src/imitation/scripts/compare_to_baseline.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/imitation/scripts/compare_to_baseline.py b/src/imitation/scripts/compare_to_baseline.py index c60812b20..97595373c 100644 --- a/src/imitation/scripts/compare_to_baseline.py +++ b/src/imitation/scripts/compare_to_baseline.py @@ -65,12 +65,14 @@ def compare_results_to_baseline(results_file: types.AnyPath) -> pd.DataFrame: baseline["count"] = 5 baseline["confidence_level"] = 0.95 # Back out the standard deviation from the margin of error. - baseline["std"] = ( - baseline["margin"] * np.sqrt(baseline["count"]) - ) / scipy.stats.t.ppf( + + t_score = scipy.stats.t.ppf( 1 - ((1 - baseline["confidence_level"]) / 2), baseline["count"] - 1, ) + std_err = baseline["margin"] / t_score + + baseline["std"] = std_err * np.sqrt(baseline["count"]) comparison = pd.merge(summary, baseline, on=["algo", "env_name"]) From 761a7e494b907dea707eed7c35cf4952e952ea2b Mon Sep 17 00:00:00 2001 From: timbauman Date: Thu, 11 May 2023 11:26:32 -0700 Subject: [PATCH 11/13] handle command names --- src/imitation/scripts/analyze.py | 22 +++--- src/imitation/scripts/compare_to_baseline.py | 74 ++++++++------------ tests/scripts/test_scripts.py | 6 +- 3 files changed, 45 insertions(+), 57 deletions(-) diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py index df8ad6b79..9aa4fa4e0 100644 --- a/src/imitation/scripts/analyze.py +++ b/src/imitation/scripts/analyze.py @@ -152,16 +152,20 @@ def _get_exp_command(sd: sacred_util.SacredDicts) -> str: def _get_algo_name(sd: sacred_util.SacredDicts) -> str: exp_command = _get_exp_command(sd) - if exp_command == "gail": - return "GAIL" - elif exp_command == "airl": - return "AIRL" - elif exp_command == "train_bc": - return "BC" - elif exp_command == "train_dagger": - return "DAgger" + COMMAND_TO_ALGO = { + "train_bc": "BC", + "bc": "BC", + "train_dagger": "DAgger", + "dagger": "DAgger", + "gail": "GAIL", + "airl": "AIRL", + "preference_comparisons": "Preference Comparisons", + } + + if exp_command.lower() in COMMAND_TO_ALGO.keys(): + return COMMAND_TO_ALGO[exp_command.lower()] else: - return f"??exp_command={exp_command}" + raise ValueError(f"Unknown command: {exp_command}") def _return_summaries(sd: sacred_util.SacredDicts) -> dict: diff --git a/src/imitation/scripts/compare_to_baseline.py b/src/imitation/scripts/compare_to_baseline.py index 97595373c..ce3b510eb 100644 --- a/src/imitation/scripts/compare_to_baseline.py +++ b/src/imitation/scripts/compare_to_baseline.py @@ -15,66 +15,26 @@ experiment returns, as reported by `imitation.scripts.analyze`. """ -import numpy as np import pandas as pd import scipy from imitation.data import types -def compare_results_to_baseline(results_file: types.AnyPath) -> pd.DataFrame: +def compare_results_to_baseline(results_filename: types.AnyPath) -> pd.DataFrame: """Compare benchmark results to baseline results. Args: - results_file: Path to a CSV file containing experiment results. + results_filename: Path to a CSV file containing experiment results. Returns: A string containing a table of p-values comparing the experiment results to the baseline results. """ - data = pd.read_csv(results_file) - data["imit_return"] = data["imit_return_summary"].apply( - lambda x: float(x.split(" ")[0]), - ) - summary = ( - data[["algo", "env_name", "imit_return"]] - .groupby(["algo", "env_name"]) - .describe() - ) - summary.columns = summary.columns.get_level_values(1) - summary = summary.reset_index() - - # Table 2 (https://arxiv.org/pdf/2211.11972.pdf) - # todo: store results in this repo outside this file - baseline = pd.DataFrame.from_records( - [ - { - "algo": "??exp_command=bc", - "env_name": "seals/Ant-v0", - "mean": 1953, - "margin": 123, - }, - { - "algo": "??exp_command=bc", - "env_name": "seals/HalfCheetah-v0", - "mean": 3446, - "margin": 130, - }, - ], - ) - baseline["count"] = 5 - baseline["confidence_level"] = 0.95 - # Back out the standard deviation from the margin of error. + results_summary = load_and_summarize_csv(results_filename) + baseline_summary = load_and_summarize_csv("baseline.csv") - t_score = scipy.stats.t.ppf( - 1 - ((1 - baseline["confidence_level"]) / 2), - baseline["count"] - 1, - ) - std_err = baseline["margin"] / t_score - - baseline["std"] = std_err * np.sqrt(baseline["count"]) - - comparison = pd.merge(summary, baseline, on=["algo", "env_name"]) + comparison = pd.merge(results_summary, baseline_summary, on=["algo", "env_name"]) comparison["pvalue"] = scipy.stats.ttest_ind_from_stats( comparison["mean_x"], @@ -88,6 +48,30 @@ def compare_results_to_baseline(results_file: types.AnyPath) -> pd.DataFrame: return comparison[["algo", "env_name", "pvalue"]] +def load_and_summarize_csv(results_filename: types.AnyPath) -> pd.DataFrame: + """Load a results CSV file and summarize the statistics. + + Args: + results_filename: Path to a CSV file containing experiment results. + + Returns: + A DataFrame containing the mean and standard deviation of the experiment + returns, grouped by algorithm and environment. + """ + data = pd.read_csv(results_filename) + data["imit_return"] = data["imit_return_summary"].apply( + lambda x: float(x.split(" ")[0]), + ) + summary = ( + data[["algo", "env_name", "imit_return"]] + .groupby(["algo", "env_name"]) + .describe() + ) + summary.columns = summary.columns.get_level_values(1) + summary = summary.reset_index() + return summary + + def main() -> None: # pragma: no cover """Run the script.""" import sys diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index f704e628f..18bb5635d 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -1110,8 +1110,8 @@ def test_compare_to_baseline_p_values( comparison.to_csv(tmpfile) assert ( - compare_to_baseline.compare_results_to_baseline(results_file=tmpfile)["pvalue"][ - 0 - ] + compare_to_baseline.compare_results_to_baseline(results_filename=tmpfile)[ + "pvalue" + ][0] < p_value ) From b2e20141cfdf4a45bccf88067c50791cfa6050ea Mon Sep 17 00:00:00 2001 From: timbauman Date: Fri, 12 May 2023 17:49:17 -0700 Subject: [PATCH 12/13] add baseline --- benchmarking/baseline.csv | 111 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 benchmarking/baseline.csv diff --git a/benchmarking/baseline.csv b/benchmarking/baseline.csv new file mode 100644 index 000000000..5f44f427a --- /dev/null +++ b/benchmarking/baseline.csv @@ -0,0 +1,111 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,101,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_711915,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),123.476 ± 2.16606 (n=56) +,0,100,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082120_c540b2,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-378.377 ± 60.6063 (n=56) +,0,102,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_ba94a1,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-314.108 ± 19.2371 (n=56) +,0,104,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_8c6aba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-0.402349 ± 19.7147 (n=56) +,0,103,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_47f04c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),18.9413 ± 1.1345 (n=56) +,0,100,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115006_924cb4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),1674.29 ± 581.622 (n=56) +,0,104,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_b838f5,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3652.14 ± 648.766 (n=56) +,0,102,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_23f6ee,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3491.62 ± 368.717 (n=56) +,0,101,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_ae2f97,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),4441.25 ± 87.8795 (n=56) +,0,103,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_1ae278,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3960.15 ± 108.134 (n=56) +,0,103,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_a8cbd6,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2600.12 ± 155.143 (n=56) +,0,101,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_299f28,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2663.1 ± 121.83 (n=56) +,0,104,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_1607e3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2740.77 ± 107.306 (n=56) +,0,100,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223305_7116b9,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2758.67 ± 121.298 (n=56) +,0,102,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_23fde3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2613.26 ± 128.037 (n=56) +,0,101,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00001_1_seed=101_2022-11-09_06-28-20', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062825_03facf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),234.52 ± 7.61457 (n=50) +,0,102,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00002_2_seed=102_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_74ab85']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),302.529 ± 7.31652 (n=50) +,0,100,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00000_0_seed=100_2022-11-09_06-28-14', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062824_6fee49']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),248.793 ± 2.30907 (n=50) +,0,103,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00003_3_seed=103_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_72d6bf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),299.295 ± 4.40014 (n=50) +,0,104,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00004_4_seed=104_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_1570e5']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.572 ± 9.13404 (n=50) +,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00001_1_seed=101_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_bb5442']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1044.57 ± 1.01596 (n=50) +,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00000_0_seed=100_2022-11-09_06-28-18', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062827_6b454c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),651.678 ± 12.0014 (n=50) +,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00003_3_seed=103_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_c4eb91']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1021 ± 68.6611 (n=50) +,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00002_2_seed=102_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_cfc95c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),-8.05116 ± 5.70636 (n=50) +,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00004_4_seed=104_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_1fdf14']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),898.306 ± 320.022 (n=50) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115141_f4ca8b,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),1499.7 ± 600.606 (n=56) +,101,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_e1e72c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),2253.66 ± 633.442 (n=56) +,103,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_259744,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),2079.42 ± 731.222 (n=56) +,104,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_245d4a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),2059.98 ± 699.001 (n=56) +,102,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_2e76df,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),1870.14 ± 737.307 (n=56) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,104,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00004_4_seed=104_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_52931f']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3540.47 ± 777.394 (n=50) +,102,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00002_2_seed=102_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_0631bc']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3308.07 ± 833.261 (n=50) +,101,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00001_1_seed=101_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150327_909529']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3517.64 ± 766.922 (n=50) +,103,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00003_3_seed=103_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_50e30b']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3405.94 ± 584.32 (n=50) +,100,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00000_0_seed=100_2022-11-14_15-03-17', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150326_6096c1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3458.01 ± 693.419 (n=50) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,103,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_43f19c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2239.95 ± 111.425 (n=56) +,100,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184630_ecee3d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2241.96 ± 133.666 (n=56) +,102,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_35a53f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2194.95 ± 129.698 (n=56) +,101,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_bbc6dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2253.88 ± 120.151 (n=56) +,104,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_f55a65,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2283.27 ± 95.716 (n=56) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00001_1_seed=101_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_bfbf99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),282.727 ± 6.70404 (n=50) +,103,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00003_3_seed=103_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_c2bdfa']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.162 ± 5.94572 (n=50) +,104,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00004_4_seed=104_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_56ab19']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),283.323 ± 5.90963 (n=50) +,102,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00002_2_seed=102_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_2c49ca']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.825 ± 7.64087 (n=50) +,100,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00000_0_seed=100_2022-11-02_15-25-48', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152556_144f05']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),287.85 ± 5.57297 (n=50) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00001_1_seed=101_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_4b9bf0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2493.52 ± 505.612 (n=50) +,103,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00003_3_seed=103_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_ec9b99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2546.03 ± 503.795 (n=50) +,100,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00000_0_seed=100_2022-11-03_09-52-11', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095218_db6ebb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2431.11 ± 561.489 (n=50) +,102,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00002_2_seed=102_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_37dfa7']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2497.32 ± 432.525 (n=50) +,104,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00004_4_seed=104_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_46e144']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2593.77 ± 424.954 (n=50) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_04cd1f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2578.98 ± 683.531 (n=104),2153.93 ± 859.53 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_769813,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2536.22 ± 697.961 (n=104),2714.79 ± 537.801 (n=56) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_26539c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2497.44 ± 707.042 (n=104),2167.93 ± 788.897 (n=56) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173716_4a49f4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2573.21 ± 620.486 (n=104),2392.39 ± 680.058 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_99afba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2557.62 ± 702.379 (n=104),2173.82 ± 730.654 (n=56) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_82aa93,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4213.44 ± 631.818 (n=64),4080.58 ± 631.88 (n=56) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_ea6184,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4030.78 ± 842.851 (n=64),4261.82 ± 624.333 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_45b32a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4233.26 ± 608.398 (n=64),4107.19 ± 692.207 (n=56) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192909_39894f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4051.72 ± 822.611 (n=64),4342.39 ± 443.082 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_67ef85,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4129.05 ± 746.065 (n=64),4068.88 ± 645.629 (n=56) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204020_dd6a68,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,413.908 ± 56.9172 (n=112),477.137 ± 42.7627 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_79244e,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,444.837 ± 61.4541 (n=112),434.321 ± 37.8565 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_525a87,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,443.562 ± 67.9984 (n=112),385.186 ± 34.7564 (n=56) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_69c197,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,456.621 ± 46.3077 (n=112),453.07 ± 31.3048 (n=56) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_6a8cab,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,427.62 ± 67.3483 (n=112),462.591 ± 43.0062 (n=56) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00000_0_seed=100_2022-11-14_01-38-47', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013855_ec142d']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,290.223 ± 10.3715 (n=102),288.126 ± 8.34982 (n=50) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00002_2_seed=102_2022-11-14_01-39-00', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013910_914b23']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,289.066 ± 10.4919 (n=102),291.563 ± 9.90896 (n=50) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00003_3_seed=103_2022-11-14_01-39-08', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013916_06a767']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,289.483 ± 9.59645 (n=102),289.95 ± 10.0327 (n=50) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00001_1_seed=101_2022-11-14_01-38-53', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013902_596a0a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,287.93 ± 8.76716 (n=102),285.815 ± 10.1058 (n=50) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00004_4_seed=104_2022-11-14_01-39-14', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013924_fd5c0e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,288.178 ± 8.95568 (n=102),289.018 ± 9.15658 (n=50) +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00001_1_seed=101_2022-11-14_00-45-42', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010211_f3e6f1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2617.43 ± 644.979 (n=100),2603.29 ± 615.705 (n=50) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00000_0_seed=100_2022-11-14_00-09-05', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_004544_4d5105']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2603.97 ± 698.661 (n=100),2696.42 ± 584.967 (n=50) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00002_2_seed=102_2022-11-14_01-02-09', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010930_87aa1e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2703.58 ± 755.159 (n=100),2643.04 ± 621.008 (n=50) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00004_4_seed=104_2022-11-14_01-12-44', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011416_4f858c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2750.23 ± 552.364 (n=100),2656.56 ± 683.49 (n=50) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00003_3_seed=103_2022-11-14_01-09-28', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011246_b62527']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2672.75 ± 510.027 (n=100),2744.53 ± 607.177 (n=50) +,0,102,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_170527_c06945,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1649.79 ± 447.102 (n=56) +,0,104,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171143_0c5a14,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2377.56 ± 615.104 (n=56) +,0,101,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_164822_bf165a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2494.22 ± 494.241 (n=56) +,0,103,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171019_da32dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1789.58 ± 825.401 (n=56) +,0,100,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_154828_32c746,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2320.07 ± 571.159 (n=56) +,0,102,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002918_4b9b24,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3174.02 ± 940.62 (n=56) +,0,103,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002959_f202b3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3165.06 ± 819.894 (n=56) +,0,101,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002637_97ec09,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2917.02 ± 998.621 (n=56) +,0,104,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_003011_9f8d5c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2840.81 ± 1171.5 (n=56) +,0,100,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_001643_0ab3dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2952.95 ± 650.494 (n=56) +,0,100,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081326_aaa4d4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2539.6 ± 160.69 (n=56) +,0,101,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081328_1544bd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2681.16 ± 121.442 (n=56) +,0,102,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_67142d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2765.27 ± 134.75 (n=56) +,0,103,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_5c0a51,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2679.83 ± 133.841 (n=56) +,0,104,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_641c89,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2691.2 ± 148.312 (n=56) +,0,103,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00003_3_seed=103_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_8d85d9']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.837 ± 7.8336 (n=50) +,0,100,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00000_0_seed=100_2022-11-03_07-52-05', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075214_8ceb71']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),292.676 ± 11.1014 (n=50) +,0,102,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00002_2_seed=102_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_d6a329']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),291.416 ± 10.8008 (n=50) +,0,101,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00001_1_seed=101_2022-11-03_07-52-10', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075215_2cc4e0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),297.635 ± 8.87094 (n=50) +,0,104,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00004_4_seed=104_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_65562a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),293.427 ± 10.7178 (n=50) +,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00004_4_seed=104_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_84fd94']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2720.91 ± 466.367 (n=50) +,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00002_2_seed=102_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_471aeb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2600.9 ± 565.618 (n=50) +,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00000_0_seed=100_2022-11-09_06-21-22', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062128_c33939']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2539.56 ± 651.114 (n=50) +,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00003_3_seed=103_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_1ac751']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2712.06 ± 608.339 (n=50) +,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00001_1_seed=101_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062129_262d36']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2642.98 ± 454.699 (n=50) From 8ccbdb0402e57a53c3f6afedbac8fd217c0da3a5 Mon Sep 17 00:00:00 2001 From: timbauman Date: Fri, 12 May 2023 18:39:47 -0700 Subject: [PATCH 13/13] fix tests --- benchmarking/baseline.csv | 111 ------------------ .../logs_example_airl_seals_ant_bhp.csv | 6 + ...gs_example_airl_seals_half_cheetah_bhp.csv | 6 + .../logs_example_airl_seals_hopper_bhp.csv | 6 + .../logs_example_airl_seals_swimmer_bhp.csv | 6 + .../logs_example_airl_seals_walker_bhp.csv | 6 + .../results/logs_example_bc_seals_ant_bhp.csv | 6 + ...logs_example_bc_seals_half_cheetah_bhp.csv | 6 + .../logs_example_bc_seals_hopper_bhp.csv | 6 + .../logs_example_bc_seals_swimmer_bhp.csv | 6 + .../logs_example_bc_seals_walker_bhp.csv | 6 + .../logs_example_dagger_seals_ant_bhp.csv | 6 + ..._example_dagger_seals_half_cheetah_bhp.csv | 6 + .../logs_example_dagger_seals_hopper_bhp.csv | 6 + .../logs_example_dagger_seals_swimmer_bhp.csv | 6 + .../logs_example_dagger_seals_walker_bhp.csv | 6 + .../logs_example_gail_seals_ant_bhp.csv | 6 + ...gs_example_gail_seals_half_cheetah_bhp.csv | 6 + .../logs_example_gail_seals_hopper_bhp.csv | 6 + .../logs_example_gail_seals_swimmer_bhp.csv | 6 + .../logs_example_gail_seals_walker_bhp.csv | 6 + src/imitation/scripts/compare_to_baseline.py | 7 +- tests/scripts/test_scripts.py | 4 +- 23 files changed, 128 insertions(+), 114 deletions(-) delete mode 100644 benchmarking/baseline.csv create mode 100644 benchmarking/results/logs_example_airl_seals_ant_bhp.csv create mode 100644 benchmarking/results/logs_example_airl_seals_half_cheetah_bhp.csv create mode 100644 benchmarking/results/logs_example_airl_seals_hopper_bhp.csv create mode 100644 benchmarking/results/logs_example_airl_seals_swimmer_bhp.csv create mode 100644 benchmarking/results/logs_example_airl_seals_walker_bhp.csv create mode 100644 benchmarking/results/logs_example_bc_seals_ant_bhp.csv create mode 100644 benchmarking/results/logs_example_bc_seals_half_cheetah_bhp.csv create mode 100644 benchmarking/results/logs_example_bc_seals_hopper_bhp.csv create mode 100644 benchmarking/results/logs_example_bc_seals_swimmer_bhp.csv create mode 100644 benchmarking/results/logs_example_bc_seals_walker_bhp.csv create mode 100644 benchmarking/results/logs_example_dagger_seals_ant_bhp.csv create mode 100644 benchmarking/results/logs_example_dagger_seals_half_cheetah_bhp.csv create mode 100644 benchmarking/results/logs_example_dagger_seals_hopper_bhp.csv create mode 100644 benchmarking/results/logs_example_dagger_seals_swimmer_bhp.csv create mode 100644 benchmarking/results/logs_example_dagger_seals_walker_bhp.csv create mode 100644 benchmarking/results/logs_example_gail_seals_ant_bhp.csv create mode 100644 benchmarking/results/logs_example_gail_seals_half_cheetah_bhp.csv create mode 100644 benchmarking/results/logs_example_gail_seals_hopper_bhp.csv create mode 100644 benchmarking/results/logs_example_gail_seals_swimmer_bhp.csv create mode 100644 benchmarking/results/logs_example_gail_seals_walker_bhp.csv diff --git a/benchmarking/baseline.csv b/benchmarking/baseline.csv deleted file mode 100644 index 5f44f427a..000000000 --- a/benchmarking/baseline.csv +++ /dev/null @@ -1,111 +0,0 @@ -agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,0,101,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_711915,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),123.476 ± 2.16606 (n=56) -,0,100,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082120_c540b2,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-378.377 ± 60.6063 (n=56) -,0,102,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_ba94a1,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-314.108 ± 19.2371 (n=56) -,0,104,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_8c6aba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-0.402349 ± 19.7147 (n=56) -,0,103,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_47f04c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),18.9413 ± 1.1345 (n=56) -,0,100,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115006_924cb4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),1674.29 ± 581.622 (n=56) -,0,104,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_b838f5,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3652.14 ± 648.766 (n=56) -,0,102,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_23f6ee,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3491.62 ± 368.717 (n=56) -,0,101,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_ae2f97,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),4441.25 ± 87.8795 (n=56) -,0,103,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_1ae278,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3960.15 ± 108.134 (n=56) -,0,103,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_a8cbd6,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2600.12 ± 155.143 (n=56) -,0,101,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_299f28,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2663.1 ± 121.83 (n=56) -,0,104,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_1607e3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2740.77 ± 107.306 (n=56) -,0,100,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223305_7116b9,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2758.67 ± 121.298 (n=56) -,0,102,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_23fde3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2613.26 ± 128.037 (n=56) -,0,101,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00001_1_seed=101_2022-11-09_06-28-20', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062825_03facf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),234.52 ± 7.61457 (n=50) -,0,102,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00002_2_seed=102_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_74ab85']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),302.529 ± 7.31652 (n=50) -,0,100,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00000_0_seed=100_2022-11-09_06-28-14', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062824_6fee49']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),248.793 ± 2.30907 (n=50) -,0,103,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00003_3_seed=103_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_72d6bf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),299.295 ± 4.40014 (n=50) -,0,104,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00004_4_seed=104_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_1570e5']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.572 ± 9.13404 (n=50) -,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00001_1_seed=101_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_bb5442']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1044.57 ± 1.01596 (n=50) -,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00000_0_seed=100_2022-11-09_06-28-18', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062827_6b454c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),651.678 ± 12.0014 (n=50) -,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00003_3_seed=103_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_c4eb91']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1021 ± 68.6611 (n=50) -,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00002_2_seed=102_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_cfc95c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),-8.05116 ± 5.70636 (n=50) -,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00004_4_seed=104_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_1fdf14']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),898.306 ± 320.022 (n=50) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,100,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115141_f4ca8b,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),1499.7 ± 600.606 (n=56) -,101,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_e1e72c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),2253.66 ± 633.442 (n=56) -,103,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_259744,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),2079.42 ± 731.222 (n=56) -,104,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_245d4a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),2059.98 ± 699.001 (n=56) -,102,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_2e76df,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Ant-v0,2408.22 ± 665.201 (n=104),1870.14 ± 737.307 (n=56) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,104,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00004_4_seed=104_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_52931f']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3540.47 ± 777.394 (n=50) -,102,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00002_2_seed=102_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_0631bc']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3308.07 ± 833.261 (n=50) -,101,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00001_1_seed=101_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150327_909529']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3517.64 ± 766.922 (n=50) -,103,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00003_3_seed=103_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_50e30b']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3405.94 ± 584.32 (n=50) -,100,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00000_0_seed=100_2022-11-14_15-03-17', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150326_6096c1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3458.01 ± 693.419 (n=50) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,103,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_43f19c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2239.95 ± 111.425 (n=56) -,100,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184630_ecee3d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2241.96 ± 133.666 (n=56) -,102,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_35a53f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2194.95 ± 129.698 (n=56) -,101,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_bbc6dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2253.88 ± 120.151 (n=56) -,104,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_f55a65,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2283.27 ± 95.716 (n=56) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,101,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00001_1_seed=101_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_bfbf99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),282.727 ± 6.70404 (n=50) -,103,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00003_3_seed=103_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_c2bdfa']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.162 ± 5.94572 (n=50) -,104,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00004_4_seed=104_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_56ab19']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),283.323 ± 5.90963 (n=50) -,102,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00002_2_seed=102_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_2c49ca']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.825 ± 7.64087 (n=50) -,100,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00000_0_seed=100_2022-11-02_15-25-48', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152556_144f05']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),287.85 ± 5.57297 (n=50) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,101,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00001_1_seed=101_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_4b9bf0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2493.52 ± 505.612 (n=50) -,103,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00003_3_seed=103_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_ec9b99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2546.03 ± 503.795 (n=50) -,100,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00000_0_seed=100_2022-11-03_09-52-11', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095218_db6ebb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2431.11 ± 561.489 (n=50) -,102,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00002_2_seed=102_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_37dfa7']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2497.32 ± 432.525 (n=50) -,104,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00004_4_seed=104_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_46e144']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=bc,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2593.77 ± 424.954 (n=50) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_04cd1f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2578.98 ± 683.531 (n=104),2153.93 ± 859.53 (n=56) -,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_769813,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2536.22 ± 697.961 (n=104),2714.79 ± 537.801 (n=56) -,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_26539c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2497.44 ± 707.042 (n=104),2167.93 ± 788.897 (n=56) -,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173716_4a49f4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2573.21 ± 620.486 (n=104),2392.39 ± 680.058 (n=56) -,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_99afba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Ant-v0,2557.62 ± 702.379 (n=104),2173.82 ± 730.654 (n=56) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_82aa93,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4213.44 ± 631.818 (n=64),4080.58 ± 631.88 (n=56) -,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_ea6184,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4030.78 ± 842.851 (n=64),4261.82 ± 624.333 (n=56) -,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_45b32a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4233.26 ± 608.398 (n=64),4107.19 ± 692.207 (n=56) -,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192909_39894f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4051.72 ± 822.611 (n=64),4342.39 ± 443.082 (n=56) -,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_67ef85,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/HalfCheetah-v0,4129.05 ± 746.065 (n=64),4068.88 ± 645.629 (n=56) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204020_dd6a68,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,413.908 ± 56.9172 (n=112),477.137 ± 42.7627 (n=56) -,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_79244e,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,444.837 ± 61.4541 (n=112),434.321 ± 37.8565 (n=56) -,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_525a87,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,443.562 ± 67.9984 (n=112),385.186 ± 34.7564 (n=56) -,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_69c197,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,456.621 ± 46.3077 (n=112),453.07 ± 31.3048 (n=56) -,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_6a8cab,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Hopper-v0,427.62 ± 67.3483 (n=112),462.591 ± 43.0062 (n=56) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00000_0_seed=100_2022-11-14_01-38-47', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013855_ec142d']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,290.223 ± 10.3715 (n=102),288.126 ± 8.34982 (n=50) -,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00002_2_seed=102_2022-11-14_01-39-00', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013910_914b23']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,289.066 ± 10.4919 (n=102),291.563 ± 9.90896 (n=50) -,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00003_3_seed=103_2022-11-14_01-39-08', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013916_06a767']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,289.483 ± 9.59645 (n=102),289.95 ± 10.0327 (n=50) -,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00001_1_seed=101_2022-11-14_01-38-53', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013902_596a0a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,287.93 ± 8.76716 (n=102),285.815 ± 10.1058 (n=50) -,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00004_4_seed=104_2022-11-14_01-39-14', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013924_fd5c0e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Swimmer-v0,288.178 ± 8.95568 (n=102),289.018 ± 9.15658 (n=50) -agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary -,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00001_1_seed=101_2022-11-14_00-45-42', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010211_f3e6f1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2617.43 ± 644.979 (n=100),2603.29 ± 615.705 (n=50) -,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00000_0_seed=100_2022-11-14_00-09-05', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_004544_4d5105']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2603.97 ± 698.661 (n=100),2696.42 ± 584.967 (n=50) -,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00002_2_seed=102_2022-11-14_01-02-09', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010930_87aa1e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2703.58 ± 755.159 (n=100),2643.04 ± 621.008 (n=50) -,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00004_4_seed=104_2022-11-14_01-12-44', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011416_4f858c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2750.23 ± 552.364 (n=100),2656.56 ± 683.49 (n=50) -,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00003_3_seed=103_2022-11-14_01-09-28', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011246_b62527']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,??exp_command=dagger,seals/Walker2d-v0,2672.75 ± 510.027 (n=100),2744.53 ± 607.177 (n=50) -,0,102,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_170527_c06945,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1649.79 ± 447.102 (n=56) -,0,104,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171143_0c5a14,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2377.56 ± 615.104 (n=56) -,0,101,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_164822_bf165a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2494.22 ± 494.241 (n=56) -,0,103,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171019_da32dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1789.58 ± 825.401 (n=56) -,0,100,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_154828_32c746,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2320.07 ± 571.159 (n=56) -,0,102,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002918_4b9b24,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3174.02 ± 940.62 (n=56) -,0,103,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002959_f202b3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3165.06 ± 819.894 (n=56) -,0,101,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002637_97ec09,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2917.02 ± 998.621 (n=56) -,0,104,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_003011_9f8d5c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2840.81 ± 1171.5 (n=56) -,0,100,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_001643_0ab3dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2952.95 ± 650.494 (n=56) -,0,100,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081326_aaa4d4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2539.6 ± 160.69 (n=56) -,0,101,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081328_1544bd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2681.16 ± 121.442 (n=56) -,0,102,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_67142d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2765.27 ± 134.75 (n=56) -,0,103,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_5c0a51,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2679.83 ± 133.841 (n=56) -,0,104,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_641c89,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2691.2 ± 148.312 (n=56) -,0,103,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00003_3_seed=103_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_8d85d9']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.837 ± 7.8336 (n=50) -,0,100,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00000_0_seed=100_2022-11-03_07-52-05', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075214_8ceb71']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),292.676 ± 11.1014 (n=50) -,0,102,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00002_2_seed=102_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_d6a329']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),291.416 ± 10.8008 (n=50) -,0,101,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00001_1_seed=101_2022-11-03_07-52-10', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075215_2cc4e0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),297.635 ± 8.87094 (n=50) -,0,104,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00004_4_seed=104_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_65562a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),293.427 ± 10.7178 (n=50) -,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00004_4_seed=104_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_84fd94']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2720.91 ± 466.367 (n=50) -,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00002_2_seed=102_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_471aeb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2600.9 ± 565.618 (n=50) -,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00000_0_seed=100_2022-11-09_06-21-22', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062128_c33939']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2539.56 ± 651.114 (n=50) -,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00003_3_seed=103_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_1ac751']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2712.06 ± 608.339 (n=50) -,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00001_1_seed=101_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062129_262d36']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2642.98 ± 454.699 (n=50) diff --git a/benchmarking/results/logs_example_airl_seals_ant_bhp.csv b/benchmarking/results/logs_example_airl_seals_ant_bhp.csv new file mode 100644 index 000000000..7c0e3ba8b --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,101,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_711915,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),123.476 ± 2.16606 (n=56) +,0,100,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082120_c540b2,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-378.377 ± 60.6063 (n=56) +,0,102,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_ba94a1,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-314.108 ± 19.2371 (n=56) +,0,104,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_8c6aba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-0.402349 ± 19.7147 (n=56) +,0,103,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_47f04c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),18.9413 ± 1.1345 (n=56) diff --git a/benchmarking/results/logs_example_airl_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_airl_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..012bcfaaa --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,100,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115006_924cb4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),1674.29 ± 581.622 (n=56) +,0,104,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_b838f5,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3652.14 ± 648.766 (n=56) +,0,102,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_23f6ee,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3491.62 ± 368.717 (n=56) +,0,101,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_ae2f97,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),4441.25 ± 87.8795 (n=56) +,0,103,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_1ae278,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3960.15 ± 108.134 (n=56) diff --git a/benchmarking/results/logs_example_airl_seals_hopper_bhp.csv b/benchmarking/results/logs_example_airl_seals_hopper_bhp.csv new file mode 100644 index 000000000..e52fb44e9 --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,103,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_a8cbd6,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2600.12 ± 155.143 (n=56) +,0,101,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_299f28,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2663.1 ± 121.83 (n=56) +,0,104,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_1607e3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2740.77 ± 107.306 (n=56) +,0,100,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223305_7116b9,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2758.67 ± 121.298 (n=56) +,0,102,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_23fde3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2613.26 ± 128.037 (n=56) diff --git a/benchmarking/results/logs_example_airl_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_airl_seals_swimmer_bhp.csv new file mode 100644 index 000000000..c4afcb6af --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,101,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00001_1_seed=101_2022-11-09_06-28-20', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062825_03facf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),234.52 ± 7.61457 (n=50) +,0,102,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00002_2_seed=102_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_74ab85']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),302.529 ± 7.31652 (n=50) +,0,100,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00000_0_seed=100_2022-11-09_06-28-14', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062824_6fee49']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),248.793 ± 2.30907 (n=50) +,0,103,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00003_3_seed=103_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_72d6bf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),299.295 ± 4.40014 (n=50) +,0,104,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00004_4_seed=104_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_1570e5']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.572 ± 9.13404 (n=50) diff --git a/benchmarking/results/logs_example_airl_seals_walker_bhp.csv b/benchmarking/results/logs_example_airl_seals_walker_bhp.csv new file mode 100644 index 000000000..cb8ffbf5b --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00001_1_seed=101_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_bb5442']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1044.57 ± 1.01596 (n=50) +,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00000_0_seed=100_2022-11-09_06-28-18', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062827_6b454c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),651.678 ± 12.0014 (n=50) +,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00003_3_seed=103_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_c4eb91']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1021 ± 68.6611 (n=50) +,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00002_2_seed=102_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_cfc95c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),-8.05116 ± 5.70636 (n=50) +,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00004_4_seed=104_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_1fdf14']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),898.306 ± 320.022 (n=50) diff --git a/benchmarking/results/logs_example_bc_seals_ant_bhp.csv b/benchmarking/results/logs_example_bc_seals_ant_bhp.csv new file mode 100644 index 000000000..bafa893de --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115141_f4ca8b,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),1499.7 ± 600.606 (n=56) +,101,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_e1e72c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),2253.66 ± 633.442 (n=56) +,103,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_259744,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),2079.42 ± 731.222 (n=56) +,104,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_245d4a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),2059.98 ± 699.001 (n=56) +,102,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_2e76df,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),1870.14 ± 737.307 (n=56) diff --git a/benchmarking/results/logs_example_bc_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_bc_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..4a8f116ec --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,104,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00004_4_seed=104_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_52931f']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3540.47 ± 777.394 (n=50) +,102,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00002_2_seed=102_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_0631bc']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3308.07 ± 833.261 (n=50) +,101,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00001_1_seed=101_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150327_909529']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3517.64 ± 766.922 (n=50) +,103,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00003_3_seed=103_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_50e30b']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3405.94 ± 584.32 (n=50) +,100,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00000_0_seed=100_2022-11-14_15-03-17', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150326_6096c1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3458.01 ± 693.419 (n=50) diff --git a/benchmarking/results/logs_example_bc_seals_hopper_bhp.csv b/benchmarking/results/logs_example_bc_seals_hopper_bhp.csv new file mode 100644 index 000000000..6634655ea --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,103,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_43f19c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2239.95 ± 111.425 (n=56) +,100,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184630_ecee3d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2241.96 ± 133.666 (n=56) +,102,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_35a53f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2194.95 ± 129.698 (n=56) +,101,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_bbc6dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2253.88 ± 120.151 (n=56) +,104,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_f55a65,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2283.27 ± 95.716 (n=56) diff --git a/benchmarking/results/logs_example_bc_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_bc_seals_swimmer_bhp.csv new file mode 100644 index 000000000..62721aead --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00001_1_seed=101_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_bfbf99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),282.727 ± 6.70404 (n=50) +,103,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00003_3_seed=103_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_c2bdfa']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.162 ± 5.94572 (n=50) +,104,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00004_4_seed=104_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_56ab19']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),283.323 ± 5.90963 (n=50) +,102,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00002_2_seed=102_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_2c49ca']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.825 ± 7.64087 (n=50) +,100,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00000_0_seed=100_2022-11-02_15-25-48', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152556_144f05']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),287.85 ± 5.57297 (n=50) diff --git a/benchmarking/results/logs_example_bc_seals_walker_bhp.csv b/benchmarking/results/logs_example_bc_seals_walker_bhp.csv new file mode 100644 index 000000000..17adb911d --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00001_1_seed=101_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_4b9bf0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2493.52 ± 505.612 (n=50) +,103,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00003_3_seed=103_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_ec9b99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2546.03 ± 503.795 (n=50) +,100,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00000_0_seed=100_2022-11-03_09-52-11', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095218_db6ebb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2431.11 ± 561.489 (n=50) +,102,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00002_2_seed=102_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_37dfa7']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2497.32 ± 432.525 (n=50) +,104,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00004_4_seed=104_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_46e144']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2593.77 ± 424.954 (n=50) diff --git a/benchmarking/results/logs_example_dagger_seals_ant_bhp.csv b/benchmarking/results/logs_example_dagger_seals_ant_bhp.csv new file mode 100644 index 000000000..a3df80cc5 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_04cd1f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2578.98 ± 683.531 (n=104),2153.93 ± 859.53 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_769813,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2536.22 ± 697.961 (n=104),2714.79 ± 537.801 (n=56) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_26539c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2497.44 ± 707.042 (n=104),2167.93 ± 788.897 (n=56) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173716_4a49f4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2573.21 ± 620.486 (n=104),2392.39 ± 680.058 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_99afba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2557.62 ± 702.379 (n=104),2173.82 ± 730.654 (n=56) diff --git a/benchmarking/results/logs_example_dagger_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_dagger_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..927af4e98 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_82aa93,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4213.44 ± 631.818 (n=64),4080.58 ± 631.88 (n=56) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_ea6184,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4030.78 ± 842.851 (n=64),4261.82 ± 624.333 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_45b32a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4233.26 ± 608.398 (n=64),4107.19 ± 692.207 (n=56) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192909_39894f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4051.72 ± 822.611 (n=64),4342.39 ± 443.082 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_67ef85,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4129.05 ± 746.065 (n=64),4068.88 ± 645.629 (n=56) diff --git a/benchmarking/results/logs_example_dagger_seals_hopper_bhp.csv b/benchmarking/results/logs_example_dagger_seals_hopper_bhp.csv new file mode 100644 index 000000000..be56fe927 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204020_dd6a68,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,413.908 ± 56.9172 (n=112),477.137 ± 42.7627 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_79244e,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,444.837 ± 61.4541 (n=112),434.321 ± 37.8565 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_525a87,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,443.562 ± 67.9984 (n=112),385.186 ± 34.7564 (n=56) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_69c197,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,456.621 ± 46.3077 (n=112),453.07 ± 31.3048 (n=56) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_6a8cab,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,427.62 ± 67.3483 (n=112),462.591 ± 43.0062 (n=56) diff --git a/benchmarking/results/logs_example_dagger_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_dagger_seals_swimmer_bhp.csv new file mode 100644 index 000000000..3fd3e1d50 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00000_0_seed=100_2022-11-14_01-38-47', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013855_ec142d']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,290.223 ± 10.3715 (n=102),288.126 ± 8.34982 (n=50) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00002_2_seed=102_2022-11-14_01-39-00', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013910_914b23']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,289.066 ± 10.4919 (n=102),291.563 ± 9.90896 (n=50) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00003_3_seed=103_2022-11-14_01-39-08', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013916_06a767']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,289.483 ± 9.59645 (n=102),289.95 ± 10.0327 (n=50) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00001_1_seed=101_2022-11-14_01-38-53', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013902_596a0a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,287.93 ± 8.76716 (n=102),285.815 ± 10.1058 (n=50) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00004_4_seed=104_2022-11-14_01-39-14', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013924_fd5c0e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,288.178 ± 8.95568 (n=102),289.018 ± 9.15658 (n=50) diff --git a/benchmarking/results/logs_example_dagger_seals_walker_bhp.csv b/benchmarking/results/logs_example_dagger_seals_walker_bhp.csv new file mode 100644 index 000000000..305b56898 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00001_1_seed=101_2022-11-14_00-45-42', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010211_f3e6f1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2617.43 ± 644.979 (n=100),2603.29 ± 615.705 (n=50) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00000_0_seed=100_2022-11-14_00-09-05', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_004544_4d5105']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2603.97 ± 698.661 (n=100),2696.42 ± 584.967 (n=50) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00002_2_seed=102_2022-11-14_01-02-09', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010930_87aa1e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2703.58 ± 755.159 (n=100),2643.04 ± 621.008 (n=50) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00004_4_seed=104_2022-11-14_01-12-44', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011416_4f858c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2750.23 ± 552.364 (n=100),2656.56 ± 683.49 (n=50) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00003_3_seed=103_2022-11-14_01-09-28', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011246_b62527']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2672.75 ± 510.027 (n=100),2744.53 ± 607.177 (n=50) diff --git a/benchmarking/results/logs_example_gail_seals_ant_bhp.csv b/benchmarking/results/logs_example_gail_seals_ant_bhp.csv new file mode 100644 index 000000000..0c538e889 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,102,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_170527_c06945,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1649.79 ± 447.102 (n=56) +,0,104,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171143_0c5a14,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2377.56 ± 615.104 (n=56) +,0,101,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_164822_bf165a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2494.22 ± 494.241 (n=56) +,0,103,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171019_da32dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1789.58 ± 825.401 (n=56) +,0,100,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_154828_32c746,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2320.07 ± 571.159 (n=56) diff --git a/benchmarking/results/logs_example_gail_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_gail_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..0a1f3e7c6 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,102,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002918_4b9b24,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3174.02 ± 940.62 (n=56) +,0,103,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002959_f202b3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3165.06 ± 819.894 (n=56) +,0,101,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002637_97ec09,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2917.02 ± 998.621 (n=56) +,0,104,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_003011_9f8d5c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2840.81 ± 1171.5 (n=56) +,0,100,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_001643_0ab3dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2952.95 ± 650.494 (n=56) diff --git a/benchmarking/results/logs_example_gail_seals_hopper_bhp.csv b/benchmarking/results/logs_example_gail_seals_hopper_bhp.csv new file mode 100644 index 000000000..1674d6508 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,100,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081326_aaa4d4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2539.6 ± 160.69 (n=56) +,0,101,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081328_1544bd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2681.16 ± 121.442 (n=56) +,0,102,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_67142d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2765.27 ± 134.75 (n=56) +,0,103,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_5c0a51,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2679.83 ± 133.841 (n=56) +,0,104,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_641c89,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2691.2 ± 148.312 (n=56) diff --git a/benchmarking/results/logs_example_gail_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_gail_seals_swimmer_bhp.csv new file mode 100644 index 000000000..15119028d --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,103,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00003_3_seed=103_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_8d85d9']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.837 ± 7.8336 (n=50) +,0,100,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00000_0_seed=100_2022-11-03_07-52-05', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075214_8ceb71']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),292.676 ± 11.1014 (n=50) +,0,102,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00002_2_seed=102_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_d6a329']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),291.416 ± 10.8008 (n=50) +,0,101,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00001_1_seed=101_2022-11-03_07-52-10', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075215_2cc4e0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),297.635 ± 8.87094 (n=50) +,0,104,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00004_4_seed=104_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_65562a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),293.427 ± 10.7178 (n=50) diff --git a/benchmarking/results/logs_example_gail_seals_walker_bhp.csv b/benchmarking/results/logs_example_gail_seals_walker_bhp.csv new file mode 100644 index 000000000..818a6a7b0 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00004_4_seed=104_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_84fd94']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2720.91 ± 466.367 (n=50) +,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00002_2_seed=102_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_471aeb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2600.9 ± 565.618 (n=50) +,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00000_0_seed=100_2022-11-09_06-21-22', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062128_c33939']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2539.56 ± 651.114 (n=50) +,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00003_3_seed=103_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_1ac751']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2712.06 ± 608.339 (n=50) +,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00001_1_seed=101_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062129_262d36']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2642.98 ± 454.699 (n=50) diff --git a/src/imitation/scripts/compare_to_baseline.py b/src/imitation/scripts/compare_to_baseline.py index ce3b510eb..59436a621 100644 --- a/src/imitation/scripts/compare_to_baseline.py +++ b/src/imitation/scripts/compare_to_baseline.py @@ -15,6 +15,8 @@ experiment returns, as reported by `imitation.scripts.analyze`. """ +import glob + import pandas as pd import scipy @@ -32,7 +34,10 @@ def compare_results_to_baseline(results_filename: types.AnyPath) -> pd.DataFrame the baseline results. """ results_summary = load_and_summarize_csv(results_filename) - baseline_summary = load_and_summarize_csv("baseline.csv") + + baseline_filenames = glob.glob("benchmarking/results/*.csv") + baseline_dfs = [load_and_summarize_csv(filename) for filename in baseline_filenames] + baseline_summary = pd.concat(baseline_dfs) comparison = pd.merge(results_summary, baseline_summary, on=["algo", "env_name"]) diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 18bb5635d..2a9caa2bd 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -1083,7 +1083,7 @@ def test_convert_trajs_from_current_format_is_idempotent( [ ( [2000, 1900, 2100], - 0.6, + 0.8, ), ( [1000, 900, 1100], @@ -1099,7 +1099,7 @@ def test_compare_to_baseline_p_values( comparison = pd.DataFrame.from_records( [ { - "algo": "??exp_command=bc", + "algo": "BC", "env_name": "seals/Ant-v0", "imit_return_summary": f"{imit_return} +/- 0.0", }