Skip to content

Commit c1e27f3

Browse files
final commit
1 parent 8ecbf02 commit c1e27f3

File tree

198 files changed

+123
-545
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

198 files changed

+123
-545
lines changed

.virtual_documents/Dimensionality_Reduction.ipynb

-192
This file was deleted.

.virtual_documents/testing.ipynb

-26
This file was deleted.

DimRed/evaluation.py

+22-17
Original file line numberDiff line numberDiff line change
@@ -80,18 +80,18 @@ def sklearn(
8080
metrics = classification_report(y_test, y_preds, output_dict=True)
8181
results[model.__class__.__name__] = metrics
8282
wandb.log(metrics)
83-
# wandb.sklearn.plot_classifier(
84-
# model,
85-
# X_train,
86-
# X_test,
87-
# y_train,
88-
# y_test,
89-
# y_preds,
90-
# y_probas,
91-
# range(min(y_probas.shape)),
92-
# model_name=name,
93-
# feature_names=None,
94-
# )
83+
wandb.sklearn.plot_classifier(
84+
model,
85+
X_train,
86+
X_test,
87+
y_train,
88+
y_test,
89+
y_preds,
90+
y_probas,
91+
range(min(y_probas.shape)),
92+
model_name=name,
93+
feature_names=None,
94+
)
9595
if metrics[self.metric] > best_model[0]:
9696
best_model[0] = metrics[self.metric]
9797
best_model[1] = metrics
@@ -139,7 +139,7 @@ def xgb(
139139
cp.asarray(X_train),
140140
cp.asarray(y_train),
141141
eval_set=[(cp.asarray(X_test), cp.asarray(y_test))],
142-
# callbacks=[WandbCallback(log_model=True)],
142+
callbacks=[WandbCallback(log_model=True)],
143143
)
144144
y_preds = model.predict(X_test)
145145
metrics = classification_report(y_test, y_preds, output_dict=True)
@@ -189,14 +189,14 @@ def lgb(
189189
self.lgb_config,
190190
train_data,
191191
valid_sets=[test_data],
192-
# callbacks=[wandb_callback()],
192+
callbacks=[wandb_callback()],
193193
)
194194
y_preds = model.predict(X_test)
195195
metrics = classification_report(
196196
y_test, np.argmax(y_preds, axis=1), output_dict=True
197197
)
198198
results[name] = metrics
199-
# log_summary(model, save_model_checkpoint=True)
199+
log_summary(model, save_model_checkpoint=True)
200200
wandb.log(metrics)
201201
wandb.finish()
202202
dirs = director_exist(os.path.join(os.getenv("MODEL_PATH"), run))
@@ -225,7 +225,12 @@ def evaluate(self) -> Dict[str, Dict[str, Dict[str, Union[str, int]]]]:
225225
for pipeline_variation in inner_iterator:
226226
name_of_pipeline = pipeline_variation.steps[-1][-1].__class__.__name__
227227
pipeline_performance = {}
228-
X_train = pipeline_variation.fit_transform(self._data["X_train"])
228+
try:
229+
X_train = pipeline_variation.fit_transform(self._data["X_train"])
230+
except:
231+
X_train = pipeline_variation.fit_transform(
232+
self._data["X_train"], self._data["y_train"]
233+
)
229234
X_test = pipeline_variation.transform(self._data["X_test"])
230235
inner_iterator.set_description("Sklearn Model...")
231236
pipeline_performance, sklearn_metrics = self.sklearn(
@@ -264,7 +269,7 @@ def evaluate(self) -> Dict[str, Dict[str, Dict[str, Union[str, int]]]]:
264269
)
265270
if float(avg_var) > float(best_performing_pipeline[0]):
266271
best_performing_pipeline[0] = str(avg_var)
267-
best_performing_pipeline[1] = name_of_pipeline
272+
best_performing_pipeline[1] = str(pipeline_variation)
268273
inner_iterator.set_description(f"{name_of_pipeline} Done :)")
269274
best_performances = add_to_dictionary(
270275
best_performances, best_performing_pipeline

README.md

+28-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,28 @@
1-
# DimRed-Unveiling-Data-Insights-with-Dimensionality-Reduction
2-
Uncover hidden patterns, simplify datasets & accelerate analysis. From PCA to t-SNE, explore advanced techniques with DimRed.
1+
# DimRed: Unveiling Data Insights with Dimensionality Reduction
2+
3+
DimRed is a Python library designed to uncover hidden patterns, simplify datasets, and accelerate analysis using various dimensionality reduction techniques. From Principal Component Analysis (PCA) to t-Distributed Stochastic Neighbor Embedding (t-SNE), DimRed offers a range of advanced methods for exploring high-dimensional data in a more manageable and interpretable form.
4+
5+
## Features
6+
7+
- **Multiple Techniques**: Explore a variety of dimensionality reduction techniques, including PCA, Incremental PCA, Kernel PCA, t-SNE, and more.
8+
- **Simplify Analysis**: Reduce the complexity of high-dimensional datasets while preserving important information and patterns.
9+
- **Visualize Data**: Visualize the transformed data in lower dimensions to gain insights and identify clusters or trends.
10+
- **Accelerate Processing**: Speed up the analysis process by reducing the number of features while maintaining data integrity.
11+
12+
## Getting Started
13+
14+
### Installation
15+
16+
You can install DimRed using pip:
17+
18+
```
19+
pip -r install requirements.txt
20+
```
21+
22+
## Contributing
23+
24+
Contributions are welcome! Please feel free to open issues for bug fixes, feature requests, or any suggestions for improvement.
25+
26+
## License
27+
28+
This project is licensed under the MIT License - see the [LICENSE](<[LICENSE](https://github.com/Programmer-RD-AI/Dimensionality-Reduction/blob/main/LICENSE)>) file for details.

0 commit comments

Comments
 (0)