diff --git a/makefile b/makefile index 6e25865..7da0b5d 100644 --- a/makefile +++ b/makefile @@ -4,7 +4,7 @@ lint: flake8 --ignore E501,W503 . test: - python main.py --alpha 4 --lr 2e-4 --max_epochs 200 + python main.py --alpha 2 --lr 2e-4 --max_epochs 200 search: python search.py diff --git a/model.py b/model.py index 9d87202..cc358a8 100644 --- a/model.py +++ b/model.py @@ -13,15 +13,15 @@ class ColorTransformerModel(pl.LightningModule): # Model layers self.layers = nn.Sequential( - nn.Linear(5, 128), - nn.Linear(128, 3), + nn.Linear(5, 128, bias=False), + nn.Linear(128, 3, bias=False), nn.ReLU(), - nn.Linear(3, 64), - nn.Linear(64, 128), - nn.Linear(128, 256), - nn.Linear(256, 128), + nn.Linear(3, 64, bias=False), + nn.Linear(64, 128, bias=False), + nn.Linear(128, 256, bias=False), + nn.Linear(256, 128, bias=False), nn.ReLU(), - nn.Linear(128, 1), + nn.Linear(128, 1, bias=False), ) def forward(self, x): @@ -85,7 +85,7 @@ class ColorTransformerModel(pl.LightningModule): def configure_optimizers(self): optimizer = torch.optim.AdamW( - self.parameters(), lr=self.hparams.learning_rate, weight_decay=1e-2 + self.parameters(), lr=self.hparams.learning_rate, ) lr_scheduler = ReduceLROnPlateau( optimizer, mode="min", factor=0.1, patience=10, verbose=True