diff --git a/model/custom_cross_entropy.py b/model/custom_cross_entropy.py
index 41172d5..7d23c56 100644
--- a/model/custom_cross_entropy.py
+++ b/model/custom_cross_entropy.py
@@ -9,4 +9,4 @@ def __init__(self, gamma=0):
 
     def forward(self, pred, target, a1_freq_list):
         assert pred.shape[0] == target.shape[0] and target.shape[0] == a1_freq_list.shape[0]
-        return (((2*a1_freq_list)**self.gamma) * self.cross_entropy_loss(pred, target)).sum()
+        return (((2*a1_freq_list)**self.gamma) * self.cross_entropy_loss(pred, target)).mean()
diff --git a/model/gru_model.py b/model/gru_model.py
index 066108e..1cd666a 100644
--- a/model/gru_model.py
+++ b/model/gru_model.py
@@ -18,7 +18,6 @@ def __init__(self, model_config, device, type_model):
         self.device = device
 
         self._features = torch.tensor(np.load(f'model/features/region_{self.region}_model_features.npy')).to(self.device)
-        self.tanh = nn.Tanh()
 
         self.gru = nn.ModuleList(self._create_gru_cell(
             self.feature_size, 
@@ -34,7 +33,8 @@ def __init__(self, model_config, device, type_model):
 
     @staticmethod
     def _create_gru_cell(input_size, hidden_units, num_layers):
-        gru = [nn.GRU(input_size, hidden_units, bidirectional=True)] + [nn.GRU(hidden_units*2, hidden_units, bidirectional=True) for _ in range(num_layers-1)]
+        gru = [nn.GRU(input_size, hidden_units, bidirectional=True)] # First layer
+        gru += [nn.GRU(hidden_units*2, hidden_units, bidirectional=True) for _ in range(num_layers-1)] # 2 -> num_layers
         return gru
     
     @staticmethod
@@ -65,7 +65,6 @@ def forward(self, x):
                 gru_output.append(outputs[t_bw, :, self.hidden_units:])
             gru_output = torch.cat(gru_output, dim=1).to(self.device)
             logit = self.list_linear[index](gru_output)
-            logit = self.tanh(logit)
             logit_list.append(logit)
         return logit_list
 
@@ -84,4 +83,4 @@ def _compute_gru(self, GRUs, _input, batch_size):
                 _input = output
             hidden = state
         logits, state = _input, hidden
-        return logits, state
+        return logits, state
\ No newline at end of file
diff --git a/model/single_model.py b/model/single_model.py
index 348acb9..0274ed1 100644
--- a/model/single_model.py
+++ b/model/single_model.py
@@ -17,10 +17,7 @@ def __init__(self,model_config, device, type_model=None):
     def forward(self, input_):
         logit_list = self.gruModel(input_)
         logit = torch.cat(logit_list, dim=0)
-        prediction = torch.reshape(
-            F.softmax(logit, dim=-1),
-            shape = [self.num_outputs, -1, self.num_classes]
-        )
+        prediction = F.softmax(torch.stack(logit_list), dim=-1)
         return logit, prediction
     
     
\ No newline at end of file
diff --git a/train.py b/train.py
index 2652ed8..3e91053 100644
--- a/train.py
+++ b/train.py
@@ -27,12 +27,11 @@ def run(dataloader, model_config, args, region):
     def count_parameters(model):
         return sum(p.numel() for p in model.parameters() if p.requires_grad)
     print("Number of learnable parameters:",count_parameters(model))
-
     loss_fn = CustomCrossEntropyLoss(gamma)
     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
     scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5)
-    early_stopping = EarlyStopping(patience=10)
-    
+    # early_stopping = EarlyStopping(patience=10)
+
     #Start train
     _r2_score_list, loss_values = [], [] #train
     r2_val_list, val_loss_list = [], [] #validation
@@ -46,10 +45,10 @@ def count_parameters(model):
         r2_val_list.append(r2_val)
         val_loss_list.append(val_loss)
         print(f"[REGION {region} - EPOCHS {t+1}]\
-            train_loss: {train_loss:>7f}, train_r2: {r2_train:>7f},\
-                val_loss: {val_loss:>7f}, val_r2: {r2_val:>7f},\
-                    test_loss: {test_loss:>7f}, test_r2: {r2_test:>7f}")   
-
+            lr: {optimizer.param_groups[0]['lr']}\
+                train_loss: {train_loss:>7f}, train_r2: {r2_train:>7f},\
+                    val_loss: {val_loss:>7f}, val_r2: {r2_val:>7f},\
+                        test_loss: {test_loss:>7f}, test_r2: {r2_test:>7f}")   
         # Save best model
         if val_loss < best_val_loss:
             best_val_loss = val_loss
@@ -57,10 +56,11 @@ def count_parameters(model):
             save_model(model, region, type_model, output_model_dir, best=True)
 
         #Early stopping
-        if args.early_stopping:
-            early_stopping(val_loss)
-            if early_stopping.early_stop:
-                break
+        # if args.early_stopping:
+        #     early_stopping(val_loss)
+        #     if early_stopping.early_stop:
+        #         break
+
     print(f"Best model at epochs {best_epochs} with loss: {best_val_loss}")
     draw_chart(loss_values, _r2_score_list, val_loss_list, r2_val_list, region, type_model)
     save_model(model, region, type_model, output_model_dir)
diff --git a/utils/imputation.py b/utils/imputation.py
index ea87108..0d09093 100644
--- a/utils/imputation.py
+++ b/utils/imputation.py
@@ -55,10 +55,9 @@ def train(dataloader, model, device, loss_fn, optimizer, scheduler):
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
-        scheduler.step()
-
         train_loss = loss.item()
 
+    scheduler.step()
     predictions = torch.cat(predictions, dim=0)
     labels = torch.cat(labels, dim=0)
     n_samples = len(labels)