Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to visualize binsparse custom level formats #59

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions binder/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
numpy
pandas
sphinxcontrib-svgbob
toolz
2 changes: 1 addition & 1 deletion binder/runtime.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
python-3.8
python-3.10
8,053 changes: 8,053 additions & 0 deletions sparsetensorviz/notebooks/Example_Rank4-binsparse.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions sparsetensorviz/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
numpy
pandas
toolz
3 changes: 3 additions & 0 deletions sparsetensorviz/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3 :: Only",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
Expand Down
227 changes: 227 additions & 0 deletions sparsetensorviz/sparsetensorviz/_bundle_binsparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import re

import toolz

from ._core import SparseTensor, unabbreviate


def num(match, name, tokenlength=2):
s = match.group(name)
if s is None:
return 0
return len(s) // tokenlength


def trim(ma, s):
start, stop = ma.span(0)
assert start == 0
return s[stop:]


class MatcherBase:
@classmethod
def match(cls, s):
return cls.pattern.match(s)


class AllSparse(MatcherBase):
"""[S-]S"""

pattern = re.compile("^S(?P<S>(-S)*)$")

def __new__(cls, ma, s, *, abbreviate=False):
numS = num(ma, "S") + 1
if abbreviate:
return [("S", numS)]
return [f"Sparse({numS})"]


class SparseFull(MatcherBase):
"""[S-][F-]F"""

pattern = re.compile("^(?P<S>(S-)*)(?P<F>(F-)*)F$")

def __new__(cls, ma, s, *, abbreviate=False):
numS = num(ma, "S")
numF = num(ma, "F") + 1
if abbreviate:
return [("S", numS), ("D", numF)]
return [f"Sparse({numS})", f"Dense({numF})"]


class Sparse(MatcherBase):
"""[S-]DC-"""

pattern = re.compile("^(?P<S>(S-)*)DC-")

def __new__(cls, ma, s, *, abbreviate=False):
numS = num(ma, "S") + 1
if abbreviate:
return [("S", numS)]
return [f"Sparse({numS})"]


class Dense(MatcherBase):
"""[C-]C-"""

pattern = re.compile("^(?P<C>(C-)+)")

def __new__(cls, ma, s, *, abbreviate=False):
numC = num(ma, "C")
if abbreviate:
return [("D", numC)]
return [f"Dense({numC})"]


class SparseCompressed(MatcherBase):
"""[S-]S-[C-]C-"""

pattern = re.compile("^(?P<S>(S-)+)(?P<C>(C-)+)")

def __new__(cls, ma, s, *, abbreviate=False):
numS = num(ma, "S")
numC = num(ma, "C")
if abbreviate:
return [("SE", numS), ("D", numC)]
return [f"Sparse({numS}, expanded=1)", f"Dense({numC})"]


def to_binsparse_groups(s, abbreviate=False):
if isinstance(s, SparseTensor):
s = s.abbreviation
elif not isinstance(s, str):
raise TypeError(
f"s argument to to_bundled_groups should be str or SparseTensor; got {type(s)}"
)
if "-" not in s:
s = "-".join(s)
orig_s = s
rv = []
matchers = [
Sparse, # [S-]DC- -> Sparse(N)
Dense, # [C-]C- -> Dense(N)
SparseCompressed, # [S-]S-[C-]C- -> Sparse(M, expanded=1), Dense(N)
# Terminal patterns
AllSparse, # [S-]S$ -> Sparse(N)
SparseFull, # [S-][F-]F$ -> Sparse(M), Dense(N)
]
while s:
for matcher in matchers:
if ma := matcher.match(s):
rv.extend(matcher(ma, s, abbreviate=abbreviate))
s = trim(ma, s)
break
else: # pragma: no cover
raise ValueError(f"Invalid structure {orig_s!r}; unable to handle {s!r}")
return rv


def structure_from_binsparse(binsparse_structure):
"""Convert user-input binsparse stucture to an internal structure"""
# This is super quick and sloppy! It allows some very sloppy input
if not isinstance(binsparse_structure, str):
text = "-".join(
val if isinstance(val, str) else "".join(map(str, val)) for val in binsparse_structure
)
else:
text = binsparse_structure
# Step 1: tokenize input string
token_map = {
"sparseexpanded": "SE",
"expandedsparse": "SE",
"sparse": "S",
"se": "SE",
"es": "SE",
"s": "S",
"dense": "D",
"d": "D",
"expanded": "E", # to handle `Sparse(3, expanded=1)`
}
ignore = "- []()_=,"
tokens = []
t = text.lower()
while t:
cont = False
if t[0] in ignore:
t = t[1:]
continue
for k, v in token_map.items():
if t.startswith(k):
tokens.append(v)
t = t[len(k) :]
cont = True
break
if cont:
continue
for i, c in enumerate(t):
if not c.isdecimal():
if i == 0:
raise ValueError(f"Bad input: {binsparse_structure}")
tokens.append(int(t[:i]))
t = t[i:]
cont = True
break
if cont:
continue
if t.isdecimal():
tokens.append(int(t))
break
raise ValueError(f"Bad input: {binsparse_structure}")

# Step 2: process tokens to form canonical binsparse format (abbreviated)
levels = []
it = toolz.sliding_window(4, tokens + [None] * 3)
for cur, n1, n2, n3 in it:
if cur == "D":
if isinstance(n1, int):
next(it)
else:
n1 = 1
levels.append(("D", n1))
elif cur == "S":
if isinstance(n1, int):
next(it)
else:
n1, n2, n3 = 1, n1, n2
if n2 != "E":
levels.append(("S", n1))
else:
next(it)
if isinstance(n3, int):
if n3 != 1:
raise ValueError(f"Bad input: {binsparse_structure}")
next(it)
levels.append(("SE", n3))
elif cur == "SE":
if isinstance(n1, int):
next(it)
if n1 != 1:
raise ValueError(f"Bad input: {binsparse_structure}")
else:
n1 = 1
levels.append(("SE", n1))
else:
raise ValueError(f"Bad input: {binsparse_structure}")

for i, (cur, n) in enumerate(levels):
if cur == "SE":
if len(levels) == i + 1 or levels[i + 1][0] != "D":
raise ValueError(f"Sparse({n}, expanded=1) level must be followed by a Dense level")
if n != 1:
raise ValueError(f"Bad input: {binsparse_structure}")
if levels[-1][0] == "D":
raise ValueError("Sparse structure must end in Sparse level; got Dense")

# Step 3: convert to internal levels
converted_levels = []
for level, n in levels:
if level == "D":
converted_levels.extend(["C"] * n)
elif level == "S":
converted_levels.extend(["S"] * (n - 1))
converted_levels.append("DC")
else: # level == "SE"
converted_levels.extend(["S"] * n)
assert converted_levels[-1] == "DC"
converted_levels[-1] = "S"
return unabbreviate("-".join(converted_levels))
49 changes: 42 additions & 7 deletions sparsetensorviz/sparsetensorviz/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,32 @@ def issorted(array):

class SparseTensor:
@classmethod
def from_taco(cls, arrays, shape=None, structure=None, *, group_indices=False):
def from_taco(
cls, arrays, shape=None, structure=None, *, group_indices=False, as_binsparse=False
):
if structure is not None:
structure = _from_taco(structure)
return cls(arrays, shape=shape, structure=structure, group_indices=group_indices)
return cls(
arrays,
shape=shape,
structure=structure,
group_indices=group_indices,
as_binsparse=as_binsparse,
)

def __init__(self, arrays, shape=None, structure=None, *, group_indices=False):
@classmethod
def from_binsparse(cls, arrays, shape=None, structure=None):
from ._bundle_binsparse import structure_from_binsparse

if structure is not None:
structure = structure_from_binsparse(structure)
return cls(arrays, shape=shape, structure=structure, as_binsparse=True)

def __init__(
self, arrays, shape=None, structure=None, *, group_indices=False, as_binsparse=False
):
self.group_indices = group_indices
self.as_binsparse = as_binsparse
if not isinstance(arrays, (list, tuple)):
raise TypeError("arrays argument must be a list or tuple of numpy arrays")
if not arrays:
Expand Down Expand Up @@ -327,21 +346,37 @@ def bundled_groups(self):

return to_bundled_groups(self)

def _repr_svg_(self, *, as_taco=False, as_groups=None):
@property
def binsparse_groups(self):
from ._bundle_binsparse import to_binsparse_groups

return to_binsparse_groups(self)

@property
def binsparse_structure(self):
from ._bundle_binsparse import to_binsparse_groups

return to_binsparse_groups(self, abbreviate=True)

def _repr_svg_(self, *, as_taco=False, as_groups=None, as_binsparse=None):
try:
from ._formatting import to_svg
except ImportError:
return
if as_groups is None:
as_groups = self.group_indices
return to_svg(self, as_taco=as_taco, as_groups=as_groups)
if as_binsparse is None:
as_binsparse = self.as_binsparse
return to_svg(self, as_taco=as_taco, as_groups=as_groups, as_binsparse=as_binsparse)

def __repr__(self, *, as_taco=False, as_groups=None):
def __repr__(self, *, as_taco=False, as_groups=None, as_binsparse=None):
from ._formatting import to_text

if as_groups is None:
as_groups = self.group_indices
return to_text(self, as_taco=as_taco, as_groups=as_groups)
if as_binsparse is None:
as_binsparse = self.as_binsparse
return to_text(self, as_taco=as_taco, as_groups=as_groups, as_binsparse=as_binsparse)


class TacoView:
Expand Down
Loading