Xmaster6y
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 4 additions & 4 deletions b/‎.pre-commit-config.yaml
Lines changed: 4 additions & 4 deletions
diff --git a/‎poetry.lock
Lines changed: 312 additions & 19 deletions b/‎poetry.lock
Lines changed: 312 additions & 19 deletions
diff --git a/‎pyproject.toml
Lines changed: 4 additions & 1 deletion b/‎pyproject.toml
Lines changed: 4 additions & 1 deletion
diff --git a/‎scripts/.gitignore
Lines changed: 1 addition & 0 deletions b/‎scripts/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎scripts/create_figure.py
Lines changed: 0 additions & 1 deletion b/‎scripts/create_figure.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scripts/make_datasets.py
Lines changed: 1 addition & 1 deletion b/‎scripts/make_datasets.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/register_wandb_dataset.py
Lines changed: 85 additions & 0 deletions b/‎scripts/register_wandb_dataset.py
Lines changed: 85 additions & 0 deletions
@@ -134,5 +134,6 @@ debug
 *.zip
 lc0
 !bin/lc0
+wandb
 
 *secret*
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 23.11.0
+    rev: 24.2.0
     hooks:
     -   id: black
         args: ["--config", "pyproject.toml"]
@@ -20,17 +20,17 @@ repos:
     hooks:
     -   id: poetry-check
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.7.1
+    rev: v1.8.0
     hooks:
     -   id: mypy
         additional_dependencies: ['types-requests', 'types-toml']
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0
+    rev: 7.0.0
     hooks:
     -   id: flake8
         args: ['--ignore=E203,W503', '--per-file-ignores=__init__.py:F401']
 -   repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
     -   id: isort
         args: ["--settings-path", "pyproject.toml"]
 
@@ -4,6 +4,7 @@ line-length = 79
 [tool.isort]
 profile = "black"
 line_length = 79
+src_paths = ["src", "tests", "scripts", "docs", "demo"]
 
 [tool.poetry]
 name = "lczerolens"
@@ -28,12 +29,13 @@ python = "^3.9"
 python-chess = "^1.999"
 torch = ">=2"
 onnx2torch = "^1.5.13"
-tensordict = "^0.2.1"
+tensordict = "^0.3.0"
 gradio = {version = "^4.14.0", optional = true}
 zennit = "<=0.4.6"
 jsonlines = "^4.0.0"
 scikit-learn = "^1.4.0"
 zennit-crp = "^0.6.0"
+einops = "^0.7.0"
 
 [tool.poetry.extras]
 demo = ["gradio"]
@@ -77,6 +79,7 @@ optional = true
 safetensors = "^0.4.2"
 pylatex = "^1.4.2"
 matplotlib = "^3.8.2"
+wandb = "^0.16.3"
 
 [build]
 target-dir = "build/dist"
@@ -1,3 +1,4 @@
 im_viz
 results
 clusters
+saes
@@ -1,7 +1,6 @@
 """Nice plotting of chessboard and heatmap with arrows.
 """
 
-
 import chess
 from pylatex import Figure, NoEscape, SubFigure
 
 
@@ -21,7 +21,7 @@
 #######################################
 # HYPERPARAMETERS
 #######################################
-parser = argparse.ArgumentParser("leela")
+parser = argparse.ArgumentParser("make-datasets")
 parser.add_argument("--output-root", type=str, default=".")
 make_test_10 = False
 make_test_5000 = False
 
@@ -0,0 +1,85 @@
+"""Register a dataset in Weights & Biases.
+
+Run with:
+```bash
+poetry run python -m scripts.register_wandb_dataset
+```
+"""
+
+import argparse
+import os
+import random
+
+import wandb
+
+from lczerolens import BoardDataset
+
+from .secret import WANDB_API_KEY
+
+#######################################
+# HYPERPARAMETERS
+#######################################
+parser = argparse.ArgumentParser("make-datasets")
+parser.add_argument("--output-root", type=str, default=".")
+make_dataset = False
+seed = 42
+train_samples = 10_000
+val_samples = 1_000
+test_samples = 1_000
+log_dataset = False
+#######################################
+
+ARGS = parser.parse_args()
+os.makedirs(f"{ARGS.output_root}/assets", exist_ok=True)
+
+if make_dataset:
+    dataset = BoardDataset("./assets/TCEC_game_collection_random_boards.jsonl")
+    all_indices = list(range(len(dataset)))
+    random.seed(seed)
+    random.shuffle(all_indices)
+    train_indices = all_indices[:train_samples]
+    val_slice = train_samples + val_samples
+    val_indices = all_indices[train_samples:val_slice]
+    test_slice = val_slice + test_samples
+    test_indices = all_indices[val_slice:test_slice]
+
+    dataset.save(
+        f"{ARGS.output_root}/assets/"
+        "TCEC_game_collection_random_boards_train.jsonl",
+        indices=train_indices,
+    )
+    dataset.save(
+        f"{ARGS.output_root}/assets/"
+        "TCEC_game_collection_random_boards_val.jsonl",
+        indices=val_indices,
+    )
+    dataset.save(
+        f"{ARGS.output_root}/assets/"
+        "TCEC_game_collection_random_boards_test.jsonl",
+        indices=test_indices,
+    )
+
+#  type: ignore
+if log_dataset:
+    wandb.login(key=WANDB_API_KEY)  # type: ignore
+    with wandb.init(  # type: ignore
+        project="lczerolens-saes", job_type="make-datasets"
+    ) as run:
+        artifact = wandb.Artifact("tcec_train", type="dataset")  # type: ignore
+        artifact.add_file(
+            f"{ARGS.output_root}/assets/"
+            "TCEC_game_collection_random_boards_train.jsonl"
+        )
+        run.log_artifact(artifact)
+        artifact = wandb.Artifact("tcec_val", type="dataset")  # type: ignore
+        artifact.add_file(
+            f"{ARGS.output_root}/assets/"
+            "TCEC_game_collection_random_boards_val.jsonl"
+        )
+        run.log_artifact(artifact)
+        artifact = wandb.Artifact("tcec_test", type="dataset")  # type: ignore
+        artifact.add_file(
+            f"{ARGS.output_root}/assets/"
+            "TCEC_game_collection_random_boards_test.jsonl"
+        )
+        run.log_artifact(artifact)
-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 im_viz
 results
 clusters
 +saes