Skip to content

Commit

Permalink
Adding new examples
Browse files Browse the repository at this point in the history
  • Loading branch information
jloveric committed Jun 11, 2024
1 parent 92d82f3 commit 1faf5a3
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 5 deletions.
22 changes: 20 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ With polynomial using similar number of parameters.
## MNIST (convolutional)

```python
python examples/mnist.py -m train_fraction=1 layer_type=polynomial2d,discontinuous2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=40 batch_size=1024
python examples/mnist.py -m train_fraction=1 layer_type=polynomial2d,discontinuous2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=40 batch_size=1024 channels=[12,32] kernel_size=5 optimizer=sophia optimizer.lr=1e-4
```
Below using max_abs and sophia kernel_size=5, channels=[12,32]. The output layer was a standard linear
layer - so only the convolutions (2 of them) were high order. Nothing particularly interesting here, except
Expand All @@ -236,7 +236,7 @@ reduce the size of the network, same as above but channels=[3,8]
Creating a deeper network with 4 convolutions and max_abs normalization between layers, 2 segments, avg pooling after each pair
of convolutions
```
python examples/mnist.py -m train_fraction=1 layer_type=discontinuous2d polynomial2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=100 batch_size=1024 channels=[16,32] output_layer_type=auto double=True kernel_size=3
python examples/mnist.py -m train_fraction=1 layer_type=discontinuous2d polynomial2d,continuous2d n=2,3,4,5,6 segments=2 max_epochs=100 batch_size=1024 channels=[16,32] output_layer_type=auto double=True kernel_size=3 optimizer=sophia optimizer.lr=1e-3
```
| n | test (polynomial) | test (continuous) | test (discontinuous)
|--------------|----------------------|------------------|------------------|
Expand All @@ -249,6 +249,24 @@ python examples/mnist.py -m train_fraction=1 layer_type=discontinuous2d polynomi
|8 | 0.989 | |
|9 | 0.988 | |
|10| 0.989 | |

Reduced the learning rate on sophia by a factor of 10, run up to a 9th order polynomial
```
python examples/mnist.py -m train_fraction=1 layer_type=polynomial2d n=2,3,4,5,6,7,8,9,10 segments=2 max_epochs=100 batch_size=1024 channels=[16,32] output_layer_type=auto double=True kernel_size=3 normalization=max_abs optimizer=sophia optimizer.lr=1e-4
```
| n | test (polynomial)
|--------------|----------------------|
|2 | 0.910 |
|3 | 0.985 |
|4 | 0.988 |
|5 | 0.989 |
|6 | 0.988 |
|7 | 0.990 |
|8 | 0.991 |
|9 | 0.991 |
|10| 0.989 |


## CIFAR100 (convolutional)

```
Expand Down
2 changes: 2 additions & 0 deletions config/mnist_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ output_layer_type: "linear"
# Double the number of convolutions if True
double: False
normalization : max_abs # max_center
defaults:
- optimizer: sophia
32 changes: 29 additions & 3 deletions examples/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import torchvision.transforms as transforms
from omegaconf import DictConfig, OmegaConf
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from torchmetrics.functional import accuracy
from Sophia import SophiaG

Expand Down Expand Up @@ -224,7 +224,31 @@ def test_step(self, batch, batch_idx):
return self.eval_step(batch, batch_idx, "test")

def configure_optimizers(self):
return SophiaG(self.parameters(), lr=0.001, rho=0.035)
if self._cfg.optimizer.name == "adam":
optimizer = optim.Adam(self.parameters(), lr=self._cfg.optimizer.lr)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
patience=self._cfg.optimizer.patience,
factor=self._cfg.optimizer.factor,
verbose=True,
)
return [optimizer], [lr_scheduler]
elif self._cfg.optimizer.name == "lion":
optimizer = Lion(self.parameters(), lr=self._cfg.optimizer.lr)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
patience=self._cfg.optimizer.patience,
factor=self._cfg.optimizer.factor,
verbose=True,
)
return [optimizer], [lr_scheduler]
elif self._cfg.optimizer.name == "sophia":
optimizer = SophiaG(
self.parameters(),
lr=self._cfg.optimizer.lr,
rho=self._cfg.optimizer.rho,
)
return optimizer


def mnist(cfg: DictConfig):
Expand All @@ -236,14 +260,16 @@ def mnist(cfg: DictConfig):
except:
pass

lr_monitor = LearningRateMonitor(logging_interval="epoch")

early_stop_callback = EarlyStopping(
monitor="val_loss", min_delta=0.00, patience=20, verbose=False, mode="min"
)

trainer = Trainer(
max_epochs=cfg.max_epochs,
accelerator=cfg.accelerator,
callbacks=[early_stop_callback],
callbacks=[early_stop_callback, lr_monitor],
)
model = Net(cfg)
trainer.fit(model)
Expand Down
6 changes: 6 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_cifar100():
"periodicity": 2.0,
"lr": 0.001,
"nonlinearity": False,
}
)
result = cifar100(cfg=cfg)
Expand All @@ -96,6 +97,11 @@ def test_mnist():
"double": False,
"kernel_size": 5,
"normalization": "max_abs",
"optimizer": {
"name": "sophia",
"lr": 1e-4,
"rho": 0.035,
},
}
)
result = mnist(cfg=cfg)
Expand Down

0 comments on commit 1faf5a3

Please sign in to comment.