Adding new examples

jloveric · Jun 11, 2024 · 1faf5a3 · 1faf5a3
1 parent 92d82f3
commit 1faf5a3
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -209,7 +209,7 @@ With polynomial using similar number of parameters.
 ## MNIST (convolutional)
 
 ```python
-python examples/mnist.py -m train_fraction=1 layer_type=polynomial2d,discontinuous2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=40 batch_size=1024
+python examples/mnist.py -m train_fraction=1 layer_type=polynomial2d,discontinuous2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=40 batch_size=1024 channels=[12,32] kernel_size=5 optimizer=sophia optimizer.lr=1e-4
 ```
 Below using max_abs and sophia kernel_size=5, channels=[12,32]. The output layer was a standard linear
 layer - so only the convolutions (2 of them) were high order. Nothing particularly interesting here, except
@@ -236,7 +236,7 @@ reduce the size of the network, same as above but channels=[3,8]
 Creating a deeper network with 4 convolutions and max_abs normalization between layers, 2 segments, avg pooling after each pair
 of convolutions
 ```
-python examples/mnist.py -m train_fraction=1 layer_type=discontinuous2d polynomial2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=100 batch_size=1024 channels=[16,32] output_layer_type=auto double=True kernel_size=3
+python examples/mnist.py -m train_fraction=1 layer_type=discontinuous2d polynomial2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=100 batch_size=1024 channels=[16,32] output_layer_type=auto double=True kernel_size=3 optimizer=sophia optimizer.lr=1e-3
 ```
 | n   | test (polynomial)      | test (continuous) | test (discontinuous)
 |--------------|----------------------|------------------|------------------|
@@ -249,6 +249,24 @@ python examples/mnist.py -m train_fraction=1 layer_type=discontinuous2d polynomi
 |8 | 0.989 |     |
 |9 | 0.988 |     |
 |10| 0.989 |     |
+
+Reduced the learning rate on sophia by a factor of 10, run up to a 9th order polynomial
+```
+python examples/mnist.py -m train_fraction=1 layer_type=polynomial2d n=2,3,4,5,6,7,8,9,10 segments=2 max_epochs=100 batch_size=1024 channels=[16,32] output_layer_type=auto double=True kernel_size=3 normalization=max_abs optimizer=sophia optimizer.lr=1e-4
+```
+| n   | test (polynomial)
+|--------------|----------------------|
+|2 | 0.910 |
+|3 | 0.985 |
+|4 | 0.988 |
+|5 | 0.989 |
+|6 | 0.988 |
+|7 | 0.990 |
+|8 | 0.991 |
+|9 | 0.991 |
+|10| 0.989 |
+
+
 ## CIFAR100 (convolutional)
 
 ```

diff --git a/config/mnist_config.yaml b/config/mnist_config.yaml
@@ -20,3 +20,5 @@ output_layer_type: "linear"
 # Double the number of convolutions if True
 double: False
 normalization : max_abs # max_center
+defaults:
+  - optimizer: sophia
diff --git a/examples/mnist.py b/examples/mnist.py
@@ -10,7 +10,7 @@
 import torchvision.transforms as transforms
 from omegaconf import DictConfig, OmegaConf
 from pytorch_lightning import LightningModule, Trainer
-from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
 from torchmetrics.functional import accuracy
 from Sophia import SophiaG
 
@@ -224,7 +224,31 @@ def test_step(self, batch, batch_idx):
         return self.eval_step(batch, batch_idx, "test")
 
     def configure_optimizers(self):
-        return SophiaG(self.parameters(), lr=0.001, rho=0.035)
+        if self._cfg.optimizer.name == "adam":
+            optimizer = optim.Adam(self.parameters(), lr=self._cfg.optimizer.lr)
+            lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
+                optimizer,
+                patience=self._cfg.optimizer.patience,
+                factor=self._cfg.optimizer.factor,
+                verbose=True,
+            )
+            return [optimizer], [lr_scheduler]
+        elif self._cfg.optimizer.name == "lion":
+            optimizer = Lion(self.parameters(), lr=self._cfg.optimizer.lr)
+            lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
+                optimizer,
+                patience=self._cfg.optimizer.patience,
+                factor=self._cfg.optimizer.factor,
+                verbose=True,
+            )
+            return [optimizer], [lr_scheduler]
+        elif self._cfg.optimizer.name == "sophia":
+            optimizer = SophiaG(
+                self.parameters(),
+                lr=self._cfg.optimizer.lr,
+                rho=self._cfg.optimizer.rho,
+            )
+            return optimizer
 
 
 def mnist(cfg: DictConfig):
@@ -236,14 +260,16 @@ def mnist(cfg: DictConfig):
     except:
         pass
 
+    lr_monitor = LearningRateMonitor(logging_interval="epoch")
+
     early_stop_callback = EarlyStopping(
         monitor="val_loss", min_delta=0.00, patience=20, verbose=False, mode="min"
     )
 
     trainer = Trainer(
         max_epochs=cfg.max_epochs,
         accelerator=cfg.accelerator,
-        callbacks=[early_stop_callback],
+        callbacks=[early_stop_callback, lr_monitor],
     )
     model = Net(cfg)
     trainer.fit(model)

diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -72,6 +72,7 @@ def test_cifar100():
             "periodicity": 2.0,
             "lr": 0.001,
             "nonlinearity": False,
+
         }
     )
     result = cifar100(cfg=cfg)
@@ -96,6 +97,11 @@ def test_mnist():
             "double": False,
             "kernel_size": 5,
             "normalization": "max_abs",
+            "optimizer": {
+                "name": "sophia",
+                "lr": 1e-4,
+                "rho": 0.035,
+            },
         }
     )
     result = mnist(cfg=cfg)