darkshapes · exdysa · Apr 3, 2025 · Apr 2, 2025 · Apr 3, 2025
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -19,8 +19,6 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - name: Set ENV
-        run:  echo "CMAKE_POLICY_VERSION_MINIMUM=3.5" >> $GITHUB_ENV
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
@@ -29,7 +27,6 @@ jobs:
         run: |
           sudo apt-get update
           sudo apt-get -y install libportaudio2
-          sudo apt-get -y install cmake
           pip cache purge
           rm -rf venv
           python -m venv .venv

diff --git a/_version.py b/_version.py
@@ -17,5 +17,5 @@
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
 
-__version__ = version = '0.1.dev232+g1cf2cd2.d20250402'
-__version_tuple__ = version_tuple = (0, 1, 'dev232', 'g1cf2cd2.d20250402')
+__version__ = version = '0.1.dev234+gb39295b.d20250402'
+__version_tuple__ = version_tuple = (0, 1, 'dev234', 'gb39295b.d20250402')
diff --git a/nnll.egg-info/PKG-INFO b/nnll.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nnll
-Version: 0.1.dev232+g1cf2cd2.d20250402
+Version: 0.1.dev234+gb39295b.d20250402
 Summary: Neural Network Link Library : A comprehensive modular toolkit for Diffusion and Large Language Model inference processes.
 Author-email: darkshapes <91800957+exdysa@users.noreply.github.com>
 License: #// SPDX-License-Identifier: blessing

diff --git a/nnll_01/__init__.py b/nnll_01/__init__.py
@@ -213,6 +213,11 @@ def debug_message(*args, **kwargs):
 
 
 os.makedirs("log", exist_ok=True)
+log_folder = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "log",
+)
+os.makedirs(log_folder, exist_ok=True)
 
 
 def main():

diff --git a/nnll_04/__init__.py b/nnll_04/__init__.py
@@ -5,13 +5,22 @@
 
 # pylint: disable=import-outside-toplevel
 from nnll_01 import debug_monitor, debug_message as dbug, info_message as nfo
+from pathlib import Path
 
 
 class ModelTool:
     """Output state dict from a model file at [path] to the ui"""
 
     def __init__(self):
         self.read_method = None
+        self.import_map = {
+            ".safetensors": self.attempt_file_open,
+            ".sft": self.attempt_file_open,
+            ".gguf": self.attempt_file_open,
+            ".pt": self.metadata_from_pickletensor,
+            ".pth": self.metadata_from_pickletensor,
+            ".ckpt": self.metadata_from_pickletensor,
+        }
 
     @debug_monitor
     def read_metadata_from(self, file_path_named: str) -> dict:
@@ -21,22 +30,14 @@ def read_metadata_from(self, file_path_named: str) -> dict:
         :return: `dict` a dictionary including the metadata header and external file attributes\n
         (model_header, disk_size, file_name, file_extension)
         """
-        from pathlib import Path
 
         metadata = None
         extension = Path(file_path_named).suffix
-        import_map = {
-            ".safetensors": self.metadata_from_safetensors,
-            ".sft": self.metadata_from_safetensors,
-            ".gguf": self.metadata_from_gguf,
-            ".pt": self.metadata_from_pickletensor,
-            ".pth": self.metadata_from_pickletensor,
-            ".ckpt": self.metadata_from_pickletensor,
-        }
-        if extension not in import_map:
+
+        if extension not in self.import_map:
             dbug("Unsupported file extension: %s", f"{extension}. Silently ignoring")
         else:
-            self.read_method = import_map.get(extension)
+            self.read_method = self.import_map.get(extension)
             metadata = self.read_method(file_path_named)
         if metadata is None:
             nfo("Couldn't load model metadata for %s", file_path_named)
@@ -154,17 +155,20 @@ def create_llama_parser(self, file_path_named: str) -> dict:
                 "general.name",
                 "general.architecture",
             ]
-            for key in name_keys:
-                value = parser.metadata.get(key)
-                if value is not None:
-                    llama_data.setdefault("name", value)
-                    break
-
-            # Determine the dtype from parser.scores.dtype, if available
-            scores_dtype = getattr(parser.scores, "dtype", None)
-            if scores_dtype is not None:
-                llama_data.setdefault("dtype", scores_dtype.name)  # e.g., 'float32'
-            # file_metadata = {UpField.METADATA: llama_data, DownField.LAYER_DATA: EmptyField.PLACEHOLDER}
+            try:
+                for key in name_keys:
+                    value = parser.metadata.get(key)
+                    if value is not None:
+                        llama_data.setdefault("name", value)
+                        break
+
+                # Determine the dtype from parser.scores.dtype, if available
+                scores_dtype = getattr(parser.scores, "dtype", None)
+                if scores_dtype is not None:
+                    llama_data.setdefault("dtype", scores_dtype.name)  # e.g., 'float32'
+                # file_metadata = {UpField.METADATA: llama_data, DownField.LAYER_DATA: EmptyField.PLACEHOLDER}
+            except ValueError as error_log:
+                dbug("Parsing file failed for %s", file_path_named, error_log, tb=error_log.__traceback__)
 
         return llama_data
 
@@ -177,31 +181,17 @@ def attempt_file_open(self, file_path_named: str) -> dict:
         :return: A `dict` with the header data prepared to read
         """
         metadata = None
-        metadata = self.create_gguf_reader(file_path_named)
-        if metadata:
-            pass
+        if Path(file_path_named).suffix in [".safetensors", ".sft"]:
+            metadata = self.metadata_from_safetensors(file_path_named)
+            if not metadata or len(metadata) == 1:
+                metadata = self.metadata_from_safe_open(file_path_named)
         else:
-            try:
+            if self.gguf_check(file_path_named):
+                metadata = self.create_gguf_reader(file_path_named)
+            if not metadata or len(metadata) == 1:
                 metadata = self.create_llama_parser(file_path_named)
-            except ValueError as error_log:
-                dbug("Parsing .gguf file failed for %s", file_path_named, error_log, tb=error_log.__traceback__)
         return metadata
 
-    @debug_monitor
-    def metadata_from_gguf(self, file_path_named: str) -> dict:
-        """
-        Collect metadata from a gguf file header\n
-        :param file_path_named: `str` the full path to the file being opened
-        :return: `dict` the key value pair structure found in the file
-        """
-
-        if self.gguf_check(file_path_named):
-            file_metadata = self.attempt_file_open(file_path_named)
-            if file_metadata is not None:
-                return file_metadata
-            return None
-        return None
-
     @debug_monitor
     def metadata_from_safetensors(self, file_path_named: str) -> dict:
         """
@@ -220,6 +210,7 @@ def metadata_from_safetensors(self, file_path_named: str) -> dict:
                 header_data = json.loads(header_data.decode("utf-8", errors="strict"))
             except json.JSONDecodeError as error_log:
                 dbug("Failed to read json from file : %s", file_path_named, error_log, tb=error_log.__traceback__)
+
             else:
                 assembled_data = header_data.copy()
                 if assembled_data.get("__metadata__"):
@@ -231,3 +222,18 @@ def metadata_from_safetensors(self, file_path_named: str) -> dict:
                 # metadata_field = json.loads(str(metadata_field).replace("'", '"'))
 
             return assembled_data
+
+    @debug_monitor
+    def metadata_from_safe_open(self, file_path_named: str) -> dict:
+        """
+        Collect metadata from a safetensors file header.\n
+        This method is less performant than `struct`\n
+        :param file_path_named: `str` the full path to the file being opened
+        :return: `dict` the key value pair structure found in the file
+        """
+        from safetensors import safe_open
+
+        with safe_open(file_path_named, framework="pt", device="cpu") as layer_content:
+            metadata = {key: layer_content.get_tensor(key).shape for key in layer_content}
+            # metadata = layer_content.metadata()
+        return metadata
diff --git a/nnll_17/__init__.py b/nnll_17/__init__.py
@@ -1,25 +1,77 @@
 ### <!-- // /*  SPDX-License-Identifier: blessing) */ -->
 ### <!-- // /*  d a r k s h a p e s */ -->
 
-
 from nnll_01 import debug_monitor, info_message as nfo
+from nnll_60 import JSONCache, HASH_PATH_NAMED
+
+cache_manager = JSONCache(HASH_PATH_NAMED)
 
 
-def hash_layers(path: str):
+def hash_layers_or_files(folder_path: str, mode: str = "layer"):
     import os
     from nnll_04 import ModelTool
     from nnll_44 import compute_hash_for
     from pathlib import Path
 
     model_tool = ModelTool()
-    nfo(path)
-    # nfo("{")
-    for each in os.listdir(os.path.normpath(Path(path))):
-        if Path(each).suffix.lower() == ".safetensors":
-            state_dict = model_tool.read_metadata_from(os.path.join(path, each))
-            hash_value = compute_hash_for(text_stream=str(state_dict))
-            nfo(f"'{hash_value}' : '{each}'")
-    # nfo("}")
+    nfo(folder_path)
+    hash_values = {}
+
+    folder_contents = os.listdir(os.path.normpath(Path(folder_path)))
+    for file_name in folder_contents:
+        if Path(file_name).suffix.lower() in [".safetensors", ".sft", ".gguf"]:
+            file_path_named = os.path.join(folder_path, file_name)
+            file_size = os.path.getsize(file_path_named)  # 1GB
+            if mode != "layer" or file_size < 1e9:
+                hex_value = compute_hash_for(file_path_named=file_path_named)
+                hash_values.setdefault(hex_value, file_path_named)
+                nfo(f"'{file_name}' : '{hex_value}'")
+            else:
+                state_dict = model_tool.read_metadata_from(file_path_named)
+                hex_value = compute_hash_for(text_stream=str(state_dict))
+                hash_values.setdefault(hex_value, file_path_named)
+                nfo(f"'{hex_value}' : '{file_name}'")
+    return hash_values
+
+
+def check_model_identity(known_hash: dict, hex_value: str, attributes: dict | None = None) -> bool:
+    """
+    Iteratively structure unpacked hash values into reference pattern, then feed a equivalence check.\n
+    :param known_hash: `dict` A dictionary of hash values known to identify models
+    :param unpacked_metadata: `dict` Values from the unknown files
+    :param attributes: `dict` Optional additional metadata, such as tensor count and file_size (None will bypass necessity of these matches)
+    :return: `bool` Whether or not the values from the model header and tensors were found inside pattern_details\n
+    """
+    for mir_name, data in known_hash.items():
+        nfo(f"islist, {type(data)}")
+        if isinstance(data, list):
+            for known in data:
+                nfo(f"{hex_value} == {known} ??")
+                if known == hex_value:
+                    return mir_name
+
+
+@cache_manager.decorator
+def compare_hash_values(hash_values: dict, data: dict):
+    import os
+    from tqdm.auto import tqdm
+
+    model_id = {}
+    for hex_value, file_path_named in tqdm(hash_values.items(), total=len(hash_values), position=0, leave=True):
+        known_hash = ""
+        if os.path.getsize(file_path_named) > 1e9:  # 1GB
+            known_hash = data.get("layer_256")
+        if not known_hash:
+            known_hash = data.get("file_256")
+        trail = check_model_identity(known_hash, hex_value)
+        nfo(trail)
+        if trail:
+            model_id.setdefault(hex_value, trail)
+    return model_id
+
+
+# put matching keys into index folder
+# tqdm it
 
 
 @debug_monitor
@@ -28,79 +80,19 @@ def main():
     import argparse
 
     # Set up argument parser
-    parser = argparse.ArgumentParser(description="Output the hash of a state dict from all model files at [path] to the console", epilog="Example: nnll-hash '~/Downloads/models/images'")
+    parser = argparse.ArgumentParser(description="Output the hash of a state dict or file from all model files at [path] to the console", epilog="Example: nnll-hash '~/Downloads/models/images'")
+    parser.add_argument("-m", "--mode", help="Change mode to calculate hash for the whole file", action="store_true")
     parser.add_argument("path", help="Path to directory where files should be analyzed. (default .)", default=".")
+
     args = parser.parse_args()
+    if args.mode:
+        expression = {"path": args.path, "mode": "file"}
+    else:
+        expression = {"path": args.path, "mode": "layer"}
 
-    hash_layers(args.path)
+    hash_values = hash_layers_or_files(**expression)
+    nfo(hash_values)
 
 
 if __name__ == "__main__":
     main()
-
-# 0f742c03f5ec009baa8a1548834f24bb9e859b9261856cad6848b6f4ee1a3d7b, artium_v20.safetensors
-# 0f742c03f5ec009baa8a1548834f24bb9e859b9261856cad6848b6f4ee1a3d7b, brixlAMustInYour_v5EndOfTheLine.safetensors
-# 20d47474da0714979e543b6f21bd12be5b5f721119c4277f364a29e329e931b9, Fluximus20primeV10.kvyR.safetensors
-# 20d47474da0714979e543b6f21bd12be5b5f721119c4277f364a29e329e931b9, ichWillMeinSteilFLUX.IMvF.safetensors
-# 31164c11db41b007f15c94651a8b1fa4d24097c18782d20fabe13c59ee07aa3a, animagineXL40_v40.safetensors
-# 31164c11db41b007f15c94651a8b1fa4d24097c18782d20fabe13c59ee07aa3a, luma20A320PDXL20VAE.5COZ.safetensors
-# c4a8d365e7fe07c6dbdd52be922aa6dc23215142342e3e7f8f967f1a123a6982, cozylustrij.OBVE.safetensors
-# c4a8d365e7fe07c6dbdd52be922aa6dc23215142342e3e7f8f967f1a123a6982, illustreijlv2.m3sq.safetensors
-# d4fc7682a4ea9f2dfa0133fafb068f03fdb479158a58260dcaa24dcf33608c16, 2dnPony_v2.safetensors
-# d4fc7682a4ea9f2dfa0133fafb068f03fdb479158a58260dcaa24dcf33608c16, cashmoneyAnime_niji.safetensors
-# d4fc7682a4ea9f2dfa0133fafb068f03fdb479158a58260dcaa24dcf33608c16, ponyFaetality_v11.safetensors
-
-
-# 176f01c1e240fd1510752aaba85c914f2a71e4f557ef75be42fe7518a4cbf890, RealHybridPonyXL.safetensors
-# 20143e5445fcbc34c4f3e9608c0ed5b89f2ccc607f1e1bc95cb3d34f3bca99ff, artiusSd35LargeTurbo.grVP.safetensors
-# 20d79b91c9190ead70f110f23aa7aaa23eefb4b0dec5bc8e3cc55f1d310c0483, midgardponyv32bf16.IZ78.safetensors
-# 24fa0e9bb4994e7f9b262a152fc2665492a097b15576c32fbf6ccf87ebc3f513, moonmixAnimeEdition_v10Pruned.safetensors
-# 291238d76c575e06aad6fcaf7d905887fa5e79c723cc5506786519d118e28058, shuttle-3-diffusion.safetensors
-# 34dff8d98898baa0f10e71943e56b588cc114253b0d2f1051f3ce7a8a45fee0b, playground-v2.5.diffusers.unet.safetensors
-# 36bb43a1e4904994a226d9ff64a561e42d5ed90bb2856d8a26313c6f11000c60, sd35FusionV1Fp8AIO.26bs.safetensors
-# 385695bb5b49c52f45818901ee0c095cd6035ed44b0904ea80e789658c932f46, d35Fusion8StepsMergeFull_v1UNET.safetensors
-# 45c56e663cf535d2ac2fdb4a12561be365646324f3c4674e770c38ecf5e40050, serenityFP8SD35LargeInclClips_v10.safetensors
-# 4a1f2b8234fa4336e263842e042d42e8d64d8a4d3941d9c0c78366b50303950c, hunyuan-1.2.diffusers.safetensors
-# 55c56c46ee8817a322aa20fd6dd0c90c69c1ddc6415b64da9ce1f6c32a1f6f5e, ichWilllMEINSTEIL_v10.safetensors
-# 56b1ccd89b0d6ab658048aa34d659788b6ed663f13ef566f4b11bccef590b9da, playground-v2.5.diffusers.unet.fp16.safetensors
-# 585555ceb76ce58efb7650fd613f8d0e648c15ed193fc23d012aec82a3ba540c, poltergeistIllustrious.lI8i.safetensors
-# 7813ebe7cbaf33ea5222cd1d826902b5b4c726d4c24e66c314de29240f75008f, openflux1-v0.1.0-fp8.safetensors
-# 79d2bfe93a2ac037cdc59ccb5576e32d00d75d4741fba49fc7e82b9724928216, flux.1.vae.diffusers.safetensors
-# 9c2722241a368683554a22782bb9b74900da9cf31b9d2b439f390fba8a395af2, hellaineMixPDXL_v45.safetensors
-# a1673b090421fecc6bdfdd485e9fa643cb4963902b2f1eab5b0cf4c95863f441, cosxl.safetensors
-# ad8763121f98e28bc4a3d5a8b494c1e8f385f14abe92fc0ca5e4ab3191f3a881, flux1-dev.safetensors
-# bff32fdf327b28dddc32e113aa4f2ce65f7a6a3c1c25dc6c3a8b326e92e66e4d, lumina_2.safetensors
-# c0ca51fdea051fcd042bf4b56d32e1e8bb9525a921f2e197f370f101e90527f0, lumina-next-sft-diffusers.safetensors
-# cb99fe4d9c2bc89062066e799494d0d8f2bbd20861ec3ebeacc5b3c7e177a707, aZovyaPhotoreal_v1Ultra.safetensors
-# ced0e5c6ce95c4bbe38fe074c630cf1fa237ee98d00c1cdc4895bba45e8bd959, ponyRealism_v22MainVAE.safetensors
-# d3990941477cde17033e454eefdc7282aea2efe99d51173d23bddcccb5f793fe, CounterfeitV30_v30.safetensors
-# d4813e9f984aa76cb4ac9bf0972d55442923292d276e97e95cb2f49a57227843, playground-v2.5-1024px-aesthetic.fp16.safetensors
-# e4d1f327a83c372276d99861a37511af313d9e7335f710f58342eb122bb04f4b, mystic-fp8.safetensors
-# ef5c9cd1ebe6e3be5e8b1347eca0a6f0b138986c71220a7f1c2c14f29d01beed, flux1-schnell.safetensors
-# f15aa739d3e4ee000e83d21cab019ccc52d2953ca97f022dc350f5f245480a12, hybrid-sdxl-700m.safetensors
-# fe2e9edf7e3923a80e64c2552139d8bae926cc3b028ca4773573a6ba60e67c20, playground-v2.5-1024px-aesthetic.safetensors
-
-
-# 62a5ab1b5fdfa4fedb32323841298c6effe1af25be94a8583350b0a7641503ef xl
-# 31164c11db41b007f15c94651a8b1fa4d24097c18782d20fabe13c59ee07aa3a animage
-# d4fc7682a4ea9f2dfa0133fafb068f03fdb479158a58260dcaa24dcf33608c16 pony
-# c4a8d365e7fe07c6dbdd52be922aa6dc23215142342e3e7f8f967f1a123a6982 il
-
-# ad8763121f98e28bc4a3d5a8b494c1e8f385f14abe92fc0ca5e4ab3191f3a881 flux dev
-# 20d47474da0714979e543b6f21bd12be5b5f721119c4277f364a29e329e931b9
-
-# 8c2e5bc99bc89290254142469411db66cb2ca2b89b129cd2f6982b30e26bd465, sd3.5 large
-# 8c2e5bc99bc89290254142469411db66cb2ca2b89b129cd2f6982b30e26bd465
-
-
-# 14d0e1b573023deb5a4feaddf85ebca10ab2abf3452c433e2e3ae93acb216443 flux hybrid
-# 14d0e1b573023deb5a4feaddf85ebca10ab2abf3452c433e2e3ae93acb216443
-
-# 117225c0e91423746114b23d3e409708ad55c90ff52b21fa7a1c5105d2e935a5, PixartXL-2-1024-ms.diffusers.safetensors
-# 987f3c2ff5d399191e5fd7dd7b1f1f285c197dc8124ad77f05cde7f2fb677a3c, Pixart-Sigma-XL-2-2k-ms.diffusers.safetensors
-
-
-# 2240ae134a3b983abf45200c198f07e3d8068012fbbd2f658bbaa1fd6a0629c0, lumina-next-sft-diffusers.vae.safetensors
-# 2240ae134a3b983abf45200c198f07e3d8068012fbbd2f658bbaa1fd6a0629c0, playground-v2.5.diffusers.vae.safetensors
-# 35641f65ad7ea600cb931dcab556f7503279f1d8d99eda170fe7976d48502a2a, auraflow.vae.diffusers.fp16.safetensors
-# 35641f65ad7ea600cb931dcab556f7503279f1d8d99eda170fe7976d48502a2a, playground-v2.5.diffusers.vae.fp16.safetensors
diff --git a/nnll_24/__init__.py b/nnll_24/__init__.py
@@ -5,6 +5,7 @@
 
 
 from nnll_01 import debug_monitor
+from nnll_33 import ValueComparison
 
 
 class KeyTrail:
@@ -22,7 +23,6 @@ def pull_key_names(cls, pattern_reference: dict, unpacked_metadata: dict, attrib
         :param attributes: `dict` Optional additional metadata, such as tensor count and file_size (None will bypass necessity of these matches)
         :return: `list` The path of keys through the target `dict` leading to a matching subtree, or None if no match is found.
         """
-        from nnll_33 import ValueComparison
 
         compare = ValueComparison()