diff --git a/model.py b/model.py index 339d89b..94c5543 100644 --- a/model.py +++ b/model.py @@ -16,6 +16,8 @@ class ColorTransformerModel(L.LightningModule): bias: bool = False, alpha: float = 0, lr: float = 0.01, + loop: bool = False, + dropout=0.5, ): super().__init__() self.save_hyperparameters() @@ -28,9 +30,16 @@ class ColorTransformerModel(L.LightningModule): w = self.hparams.width d = self.hparams.depth bias = self.hparams.bias - midlayers = [] - for _ in range(d): - midlayers += [nn.Linear(w, w, bias=bias), t()] + if self.hparams.loop: + midlayers = [nn.Linear(w, w, bias=bias), t()] * d + else: + midlayers = sum( + [ + [nn.Linear(w, w, bias=bias), nn.Dropout(self.dropout), t()] + for _ in range(d) + ], + [], + ) self.network = nn.Sequential( nn.Linear(3, w, bias=bias), diff --git a/newsearch.py b/newsearch.py index 6b30c69..99d5d9d 100644 --- a/newsearch.py +++ b/newsearch.py @@ -27,10 +27,10 @@ learning_rate_values = [1e-3] # learning_rate_values = [5e-4] # alpha_values = [0, .25, 0.5, 0.75, 1] # alpha = 0 is unsupervised. alpha = 1 is supervised. -alpha_values = [0.99] +alpha_values = [0.9] # widths = [2**k for k in range(4, 13)] # depths = [1, 2, 4, 8, 16] -widths, depths = [512], [1, 2, 4] +widths, depths = [512], [8] batch_size_values = [256] max_epochs_values = [100] @@ -79,6 +79,7 @@ python newmain.py fit \ --model.width {w} \ --model.depth {d} \ --model.bias true \ +--model.loop true \ --model.transform relu \ --trainer.min_epochs 10 \ --trainer.max_epochs {me} \