diff --git a/Assets/Scripts/Agents/WormSimpleAgent.cs b/Assets/Scripts/Agents/WormSimpleAgent.cs index d785ce1..655c7e3 100644 --- a/Assets/Scripts/Agents/WormSimpleAgent.cs +++ b/Assets/Scripts/Agents/WormSimpleAgent.cs @@ -1,27 +1,25 @@ -using CommandTerminal; using Unity.MLAgents; using Unity.MLAgents.Actuators; using Unity.MLAgents.Sensors; -using UnityEngine; namespace DialogosEngine { public class WormSimpleAgent : Agent { - CommandLogger Logger; + //CommandLogger Logger; bool _IsInitialized = false; string _ExpectedString = "echo hello "; string _CachedString; public override void Initialize() { - Logger = new CommandLogger("WormSimpleAgent_log.txt", 1000); - Logger.Log($"[{StepCount}] Initialize"); + //Logger = new CommandLogger("WormSimpleAgent_log.txt"); + //Logger.Log($"[{StepCount}] Initialize"); } public override void OnEpisodeBegin() { - Logger.Log($"[{StepCount}] OnEpisodeBegin"); + //Logger.Log($"[{StepCount}] OnEpisodeBegin"); _IsInitialized = true; } @@ -54,12 +52,12 @@ public override void OnActionReceived(ActionBuffers actions) float[] _actionArray = actions.ContinuousActions.Array; float _lengthControlValue = _actionArray[0]; - Logger.Log($"[{StepCount}] Length Control Value: {_lengthControlValue}"); + //Logger.Log($"[{StepCount}] Length Control Value: {_lengthControlValue}"); int outputLength = Transformer.RoundMax(ref _lengthControlValue); - Logger.Log($"[{StepCount}] Rounded Output Length: {outputLength}"); - + //Logger.Log($"[{StepCount}] Rounded Output Length: {outputLength}"); + for (int i = 1; i < _actionArray.Length; i++) { _actionArray[i] = Transformer.Transform(ref _actionArray[i]); @@ -69,9 +67,9 @@ public override void OnActionReceived(ActionBuffers actions) //Logger.Log($"[{StepCount}] Processed Action Array: {string.Join(", ", _actionArray)}"); - _CachedString = Lexer.QuantizeUTF8(_actionArray); + //_CachedString = Lexer.QuantizeUTF8(_actionArray); - Logger.Log($"[{StepCount}] Quantized String: {_CachedString}"); + //Logger.Log($"[{StepCount}] Quantized String: {_CachedString}"); } void FixedUpdate() @@ -81,9 +79,30 @@ void FixedUpdate() return; } - var reward = Random.value; - AddReward(reward); - Logger.Log($"[{StepCount}] FixedUpdate.reward: {reward}"); + if (_CachedString != null) + { + float reward = AgentUtils.CalculateEchoReward(_ExpectedString, _CachedString); + + if (_CachedString.EndsWith(AgentUtils.k_EndOfSequence)) + { + string _commandLine = _CachedString.Replace(AgentUtils.k_EndOfSequence, ""); + //Logger.Log($"[{StepCount}] COMMAND: " + _commandLine); + } + + if (_CachedString == _ExpectedString) + { + //Logger.Log($"[{StepCount}] MATCH: " + _CachedString); + _CachedString = null; + SetReward(1f); + EndEpisode(); + } + else + { + //Logger.Log($"[{StepCount}] {reward} | {_CachedString}"); + _CachedString = null; + SetReward(reward); + } + } } } } \ No newline at end of file diff --git a/config/sac/WormSimple.yaml b/config/sac/WormSimple.yaml index 01cdc74..0fdb999 100644 --- a/config/sac/WormSimple.yaml +++ b/config/sac/WormSimple.yaml @@ -3,25 +3,35 @@ behaviors: trainer_type: sac hyperparameters: learning_rate: 0.0003 - learning_rate_schedule: constant - batch_size: 256 - buffer_size: 500000 + learning_rate_schedule: linear + batch_size: 1000 + buffer_size: 1000000 buffer_init_steps: 0 tau: 0.005 - steps_per_update: 20.0 + steps_per_update: 30.0 save_replay_buffer: false init_entcoef: 1.0 - reward_signal_steps_per_update: 20.0 + reward_signal_steps_per_update: 30.0 network_settings: normalize: true - hidden_units: 512 + hidden_units: 1024 num_layers: 3 vis_encode_type: simple + memory: + sequence_length: 1000 + memory_size: 1024 reward_signals: extrinsic: gamma: 0.995 strength: 1.0 - keep_checkpoints: 5 - max_steps: 5000000 + curiosity: + strength: 0.1 + gamma: 0.995 + network_settings: + hidden_units: 1024 + learning_rate: 0.0001 + keep_checkpoints: 100 + checkpoint_interval: 10000 + max_steps: 1000000 time_horizon: 1000 - summary_freq: 30000 + summary_freq: 10000 diff --git a/results/WormAlpha1/Worm.onnx b/results/WormAlpha1/Worm.onnx deleted file mode 100644 index c1e5acb..0000000 Binary files a/results/WormAlpha1/Worm.onnx and /dev/null differ diff --git a/results/WormAlpha1/Worm/Worm-1000.onnx b/results/WormAlpha1/Worm/Worm-1000.onnx deleted file mode 100644 index c1e5acb..0000000 Binary files a/results/WormAlpha1/Worm/Worm-1000.onnx and /dev/null differ diff --git a/results/WormAlpha1/Worm/Worm-1000.pt b/results/WormAlpha1/Worm/Worm-1000.pt deleted file mode 100644 index 2485fa6..0000000 Binary files a/results/WormAlpha1/Worm/Worm-1000.pt and /dev/null differ diff --git a/results/WormAlpha1/Worm/checkpoint.pt b/results/WormAlpha1/Worm/checkpoint.pt deleted file mode 100644 index 0719661..0000000 Binary files a/results/WormAlpha1/Worm/checkpoint.pt and /dev/null differ diff --git a/results/WormAlpha1/Worm/events.out.tfevents.1712854294.VVI-Ii73P1II.29152.0 b/results/WormAlpha1/Worm/events.out.tfevents.1712854294.VVI-Ii73P1II.29152.0 deleted file mode 100644 index 46eca01..0000000 Binary files a/results/WormAlpha1/Worm/events.out.tfevents.1712854294.VVI-Ii73P1II.29152.0 and /dev/null differ diff --git a/results/WormAlpha1/configuration.yaml b/results/WormAlpha1/configuration.yaml deleted file mode 100644 index 2a30825..0000000 --- a/results/WormAlpha1/configuration.yaml +++ /dev/null @@ -1,78 +0,0 @@ -default_settings: null -behaviors: - Worm: - trainer_type: sac - hyperparameters: - learning_rate: 0.0003 - learning_rate_schedule: constant - batch_size: 256 - buffer_size: 500000 - buffer_init_steps: 0 - tau: 0.005 - steps_per_update: 20.0 - save_replay_buffer: false - init_entcoef: 1.0 - reward_signal_steps_per_update: 20.0 - checkpoint_interval: 500000 - network_settings: - normalize: true - hidden_units: 512 - num_layers: 3 - vis_encode_type: simple - memory: null - goal_conditioning_type: hyper - deterministic: false - reward_signals: - extrinsic: - gamma: 0.995 - strength: 1.0 - network_settings: - normalize: false - hidden_units: 128 - num_layers: 2 - vis_encode_type: simple - memory: null - goal_conditioning_type: hyper - deterministic: false - init_path: null - keep_checkpoints: 5 - even_checkpoints: false - max_steps: 5000000 - time_horizon: 1000 - summary_freq: 30000 - threaded: false - self_play: null - behavioral_cloning: null -env_settings: - env_path: null - env_args: null - base_port: 5005 - num_envs: 1 - num_areas: 1 - timeout_wait: 60 - seed: -1 - max_lifetime_restarts: 10 - restarts_rate_limit_n: 1 - restarts_rate_limit_period_s: 60 -engine_settings: - width: 84 - height: 84 - quality_level: 5 - time_scale: 1.0 - target_frame_rate: -1 - capture_frame_rate: 60 - no_graphics: false - no_graphics_monitor: false -environment_parameters: null -checkpoint_settings: - run_id: WormAlpha1 - initialize_from: null - load_model: false - resume: false - force: true - train_model: false - inference: false - results_dir: results -torch_settings: - device: null -debug: false diff --git a/results/WormAlpha1/run_logs/timers.json b/results/WormAlpha1/run_logs/timers.json deleted file mode 100644 index 3ac7794..0000000 --- a/results/WormAlpha1/run_logs/timers.json +++ /dev/null @@ -1,174 +0,0 @@ -{ - "name": "root", - "metadata": { - "timer_format_version": "0.1.0", - "start_time_seconds": "1712854262", - "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:34:57) [MSC v.1936 64 bit (AMD64)]", - "command_line_arguments": "C:\\Users\\3nigma\\Unity\\Dialogos\\miniconda\\envs\\Dialogos\\Scripts\\mlagents-learn config/sac/Worm2.yaml --run-id=WormAlpha1 --time-scale 1 --quality-level 5 --force", - "mlagents_version": "1.1.0.dev0", - "mlagents_envs_version": "1.1.0.dev0", - "communication_protocol_version": "1.5.0", - "pytorch_version": "2.2.1+cpu", - "numpy_version": "1.23.5", - "end_time_seconds": "1712854348" - }, - "total": 85.72575959999813, - "count": 1, - "self": 0.006982099992455915, - "children": { - "run_training.setup": { - "total": 0.13349279999965802, - "count": 1, - "self": 0.13349279999965802 - }, - "TrainerController.start_learning": { - "total": 85.58528470000601, - "count": 1, - "self": 0.039516099699540064, - "children": { - "TrainerController._reset_env": { - "total": 32.579916700022295, - "count": 1, - "self": 32.579916700022295 - }, - "TrainerController.advance": { - "total": 52.71392080030637, - "count": 1717, - "self": 0.03680989984422922, - "children": { - "env_step": { - "total": 49.13399119989481, - "count": 1717, - "self": 47.100704200944165, - "children": { - "SubprocessEnvManager._take_step": { - "total": 2.0078564995492343, - "count": 1717, - "self": 0.1212072994094342, - "children": { - "TorchPolicy.evaluate": { - "total": 1.8866492001398, - "count": 1717, - "self": 1.8866492001398 - } - } - }, - "workers": { - "total": 0.02543049940140918, - "count": 1716, - "self": 0.0, - "children": { - "worker_root": { - "total": 54.09848149988102, - "count": 1716, - "is_parallel": true, - "self": 8.901214599609375, - "children": { - "steps_from_proto": { - "total": 0.0021652000141330063, - "count": 1, - "is_parallel": true, - "self": 0.00011900000390596688, - "children": { - "_process_rank_one_or_two_observation": { - "total": 0.0020462000102270395, - "count": 2, - "is_parallel": true, - "self": 0.0020462000102270395 - } - } - }, - "UnityEnvironment.step": { - "total": 45.19510170025751, - "count": 1716, - "is_parallel": true, - "self": 0.1761415013752412, - "children": { - "UnityEnvironment._generate_step_input": { - "total": 0.13446629943791777, - "count": 1716, - "is_parallel": true, - "self": 0.13446629943791777 - }, - "communicator.exchange": { - "total": 44.52655239921296, - "count": 1716, - "is_parallel": true, - "self": 44.52655239921296 - }, - "steps_from_proto": { - "total": 0.3579415002313908, - "count": 1716, - "is_parallel": true, - "self": 0.14428380085155368, - "children": { - "_process_rank_one_or_two_observation": { - "total": 0.21365769937983714, - "count": 3432, - "is_parallel": true, - "self": 0.21365769937983714 - } - } - } - } - } - } - } - } - } - } - }, - "trainer_advance": { - "total": 3.543119700567331, - "count": 1716, - "self": 0.04404330183751881, - "children": { - "process_trajectory": { - "total": 0.07365829896298237, - "count": 1716, - "self": 0.07365829896298237 - }, - "_update_policy": { - "total": 3.42541809976683, - "count": 717, - "self": 0.0047104001569096, - "children": { - "OffPolicyTrainer._update_policy": { - "total": 3.4207076996099204, - "count": 717, - "self": 0.8939943996374495, - "children": { - "TorchSACOptimizer.update": { - "total": 2.526713299972471, - "count": 49, - "self": 2.526713299972471 - } - } - } - } - } - } - } - } - }, - "trainer_threads": { - "total": 1.9999861251562834e-06, - "count": 1, - "self": 1.9999861251562834e-06 - }, - "TrainerController._save_models": { - "total": 0.2519290999916848, - "count": 1, - "self": 0.03625050000846386, - "children": { - "RLTrainer._checkpoint": { - "total": 0.21567859998322092, - "count": 1, - "self": 0.21567859998322092 - } - } - } - } - } - } -} \ No newline at end of file diff --git a/results/WormAlpha1/run_logs/training_status.json b/results/WormAlpha1/run_logs/training_status.json deleted file mode 100644 index f263e55..0000000 --- a/results/WormAlpha1/run_logs/training_status.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "Worm": { - "checkpoints": [ - { - "steps": 1000, - "file_path": "results\\WormAlpha1\\Worm\\Worm-1000.onnx", - "reward": null, - "creation_time": 1712854348.647669, - "auxillary_file_paths": [ - "results\\WormAlpha1\\Worm\\Worm-1000.pt" - ] - } - ], - "final_checkpoint": { - "steps": 1000, - "file_path": "results\\WormAlpha1\\Worm.onnx", - "reward": null, - "creation_time": 1712854348.647669, - "auxillary_file_paths": [ - "results\\WormAlpha1\\Worm\\Worm-1000.pt" - ] - } - }, - "metadata": { - "stats_format_version": "0.3.0", - "mlagents_version": "1.1.0.dev0", - "torch_version": "2.2.1+cpu" - } -} \ No newline at end of file