diff --git a/Cargo.toml b/Cargo.toml
index a8ecfdf4..9780748c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,31 +11,22 @@ edition = "2018"
 csv = "1.1.3"
 # we need new version to enable static builds
 fasthash = "0.4"
-# fasthash = { git = "https://github.com/flier/rust-fasthash", rev="6ca68b93f7" }
-# fasthash = { git = "https://github.com/flier/rust-fasthash", rev="ef0c52b4157af9a1a7d19b2a37658b6c26a6bea6" }
 serde = {version = "1.0.114" , features = ["derive"]}
 serde_json = "1.0.55"
-#fastapprox = "0.3.0"
 clap = "2.33.1"
 byteorder = "1.3.4"
-#backtrace = "0.3.46"
-#triomphe = "0.1.1"
 merand48 = "0.1.0"
 daemonize = "0.4.1"
 lz4 = "1.23.2"
 nom = "7"
 dyn-clone = "1.0"
-#funty="=1.1.0"	# no need for pinning any more
 rand = "0.8.5"
 rand_distr = "0.4.3"
-#rand_core = "0.4.2"
 rand_xoshiro = "0.6.0"
-# We'll use cloudflare's zlib as it is the fastest game in town
-#flate2 = "1.0" #minz library
-#flate2 = { version = "1.0", features = ["zlib"], default-features = false }
 flate2 = { version = "1.0", features = ["cloudflare_zlib"], default-features = false }
 shellwords = "1.1.0"
 blas = "0.22"
+#jemallocator = "0.5.0"
 intel-mkl-src = {version= "0.7.0", default-features = false, features=["download", "mkl-static-lp64-seq"]}
 libm = "0.2.6"
 [build-dependencies]
@@ -44,34 +35,16 @@ cbindgen = "0.23.0"
 [lib]
 crate_type = ["cdylib"]
 doctest = false
-#blas = "0.22"
-#intel-mkl-src = {version= "0.7.0", default-features = false, features=["download", "mkl-static-lp64-seq"]}
-#blas = "0.22"
-#intel-mkl-src = {version= "0.7.0", default-features = false, features=["download", "mkl-static-lp64-seq"]}
-#blas-src = { version = "0.8", features = ["intel-mkl"] }
-#openblas-src = {version = "0.10.4", features = ["static"]}
-#cblas = "0.4.0"
-#intel-mkl-src = {version= "0.7.0", default-features = false, features=["download", "mkl-static-lp64-seq"]}
-#blas-src = "0.8.0"
-#blas = "0.22"
-#openblas-src = {version = "0.10.4", features=["static"]}
-
-#blas = "0.22"
-#openblas-src = {version = "0.10.4" }
-
-#blas-src = { version = "0.8", features = ["intel-mkl"] }
-
-#rust-blas="0.2.0"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 mockstream = "0.0.3"
 
 [profile.release]
-debug = true
-#lto = 'fat'
-#panic = 'abort'
-#codegen-units=1
+debug = false
+lto = false
+panic = 'abort'
+codegen-units=1
 
 [profile.dev]
 opt-level = 2
diff --git a/build.sh b/build.sh
new file mode 100644
index 00000000..2a31387b
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+################################################################################################
+# This file serves as an entrypoint for building the binary with specific rustc flags.		   #
+# If there are flags you would like to test out, simply add them to RUSTFLAGS env. By default, #
+# no flags are used (generic release build)													   #
+################################################################################################
+
+cargo build --release;
+
+# Using specific flags examples
+#RUSTFLAGS="-Ctarget-cpu=skylake" cargo build --release;
+#RUSTFLAGS="-Ctarget-cpu=cascadelake" cargo build --release;
diff --git a/src/main.rs b/src/main.rs
index cad73b3c..4d58a22a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,8 @@
 #![allow(unused_mut)]
 #![allow(non_snake_case)]
 #![allow(redundant_semicolons)]
+//#[global_allocator]
+//static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
 use flate2::read::MultiGzDecoder;
 use std::collections::VecDeque;
diff --git a/src/model_instance.rs b/src/model_instance.rs
index 7427ed00..ec575d8d 100644
--- a/src/model_instance.rs
+++ b/src/model_instance.rs
@@ -318,12 +318,12 @@ impl ModelInstance {
         
         if let Some(val) = cl.value_of("ffm_bit_precision") {
             mi.ffm_bit_precision = val.parse()?;
-            println!("FFM num weight bits = {}", mi.ffm_bit_precision); // vwcompat
+//            println!("FFM num weight bits = {}", mi.ffm_bit_precision); // vwcompat
         }
 
         if let Some(val) = cl.value_of("bit_precision") {
             mi.bit_precision = val.parse()?;
-            println!("Num weight bits = {}", mi.bit_precision); // vwcompat
+//            println!("Num weight bits = {}", mi.bit_precision); // vwcompat
         }
 
         mi.learning_rate 	 = parse_float("learning_rate", 	mi.learning_rate, &cl);
@@ -407,7 +407,7 @@ impl ModelInstance {
 		/*! A method that enables updating hyperparameters of an existing (pre-loaded) model.
 		Currently limited to the most commonly used hyperparameters: ffm_learning_rate, ffm_power_t, power_t, learning_rate. */
 		
-		println!("Replacing initial regressor's hyperparameters from the command line ..");
+//		println!("Replacing initial regressor's hyperparameters from the command line ..");
 		let mut replacement_hyperparam_ids: Vec<(String, String)> = vec![];
 		
 		// Handle learning rates
diff --git a/src/multithread_helpers.rs b/src/multithread_helpers.rs
index 62ff7674..3a983ff6 100644
--- a/src/multithread_helpers.rs
+++ b/src/multithread_helpers.rs
@@ -47,10 +47,11 @@ impl <T:Sized>Drop for UnsafelySharableTrait<T> {
             if count == 0 {
                 let box_to_be_dropped = ManuallyDrop::take(&mut self.content);
                 // Now this means that the content will be dropped
-                println!("Dropping BoxedRegressorTrait!");
-            } else {
-                println!("Not dropping BoxedRegressorTrait as there are still {} references!", count);
+                // println!("Dropping BoxedRegressorTrait!");
             }
+			//else {
+            //    println!("Not dropping BoxedRegressorTrait as there are still {} references!", count);
+            //}
         }
         
     }
@@ -81,7 +82,7 @@ impl BoxedRegressorTrait {
                 content: ManuallyDrop::new(r2),
                 reference_count: self.reference_count.clone()        
             };
-            println!("References to object: {}", Arc::<Mutex<PhantomData<u32>>>::strong_count(&ret.reference_count));
+            // println!("References to object: {}", Arc::<Mutex<PhantomData<u32>>>::strong_count(&ret.reference_count));
             ret
         }
     }
diff --git a/src/regressor.rs b/src/regressor.rs
index a1e2f403..3935d279 100644
--- a/src/regressor.rs
+++ b/src/regressor.rs
@@ -190,8 +190,8 @@ impl Regressor  {
                     _ => Err(format!("unknown nn initialization type: \"{}\"", init_type_str)).unwrap()
                 };
                 let neuron_type = block_neural::NeuronType::WeightedSum;
-                println!("Neuron layer: width: {}, neuron type: {:?}, dropout: {}, maxnorm: {}, init_type: {:?}",
-                                        width, neuron_type, dropout, maxnorm, init_type);
+                // println!("Neuron layer: width: {}, neuron type: {:?}, dropout: {}, maxnorm: {}, init_type: {:?}",
+                //                        width, neuron_type, dropout, maxnorm, init_type);
                 output =  block_neural::new_neuronlayer_block(&mut bg, 
                                             &mi, 
                                             output,
@@ -206,7 +206,7 @@ impl Regressor  {
                 
                 if layernorm == NNLayerNorm::BeforeActivation {
                     output = block_normalize::new_normalize_layer_block(&mut bg, &mi, output).unwrap();
-                    println!("Normalize layer before relu");
+//                    println!("Normalize layer before relu");
                 }
 
                 match activation {
@@ -231,7 +231,7 @@ impl Regressor  {
 
                 if layernorm == NNLayerNorm::AfterActivation {
                     output = block_normalize::new_normalize_layer_block(&mut bg, &mi, output).unwrap();
-                    println!("Normalize layer after relu");
+    //                println!("Normalize layer after relu");
                 }