From 5a4c34d3ee396c84c2b963e26bb0dcb3ab1d44fa Mon Sep 17 00:00:00 2001 From: Daan Damhuis Date: Fri, 17 Mar 2023 22:10:00 +0100 Subject: [PATCH 1/2] [Docs] Bugix and README.md update (#89) * Update README.md * Update README.md * Update README.md * Update README.md * Update document.py * Update README.md * Update document.py * Update README.md * Update README.md * Update README.md * Refactored the documenting part, need to clean some more up. * Tables and Colums, Need to do Measures * Refactored Document.py * Bugfix: CalculationGroupSource in Partitions - Doesn't have Query as Property. * Bugfix: CalculationGroup Source. * Added Docstrings (docs / cult) Bugfix in Translations Refactoring docs * Updated the docstring requirements. * Adjusted some more errors * Adjusted last flake8 errors * Adjusted last flake8 errors * Removed whitespace * Typing Error for anything before python 3.9 * Adjustments to Colums and Tables * Update README.md * Feature/docs sturcture > Merge issues (#3) * Update README.md * Update README.md * Update README.md * Update README.md * Update document.py * Update README.md * Update document.py * Update README.md * Update README.md * Update README.md * Refactored the documenting part, need to clean some more up. * Tables and Colums, Need to do Measures * Refactored Document.py * Bugfix: CalculationGroupSource in Partitions - Doesn't have Query as Property. * Bugfix: CalculationGroup Source. * Added Docstrings (docs / cult) Bugfix in Translations Refactoring docs * Updated the docstring requirements. * Adjusted some more errors * Adjusted last flake8 errors * Adjusted last flake8 errors * Removed whitespace * Typing Error for anything before python 3.9 * Adjustments to Colums and Tables * Update README.md * Added Docstring for general page * New Line at end of file * Resolved Flake8 isses --------- Co-authored-by: Daan Damhuis Co-authored-by: Curtis Stallings * Added the Get function to `object.py` * Bugfix: Object name without translations becomes NONE * Updated the formatting to pass flake8. * Updated the README.md for spelling error and new functionality. --------- Co-authored-by: Daan Damhuis Co-authored-by: Curtis Stallings --- docs/README.md | 127 ++++++++++++++++++++++++++++-------------- pytabular/culture.py | 2 +- pytabular/document.py | 6 +- pytabular/object.py | 20 +++++++ 4 files changed, 109 insertions(+), 46 deletions(-) diff --git a/docs/README.md b/docs/README.md index 1a23119..913bf46 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,4 +1,5 @@ # PyTabular + [![PyPI version](https://badge.fury.io/py/python-tabular.svg)](https://badge.fury.io/py/python-tabular) [![Downloads](https://pepy.tech/badge/python-tabular)](https://pepy.tech/project/python-tabular) [![readthedocs](https://github.com/Curts0/PyTabular/actions/workflows/readthedocs.yml/badge.svg)](https://github.com/Curts0/PyTabular/actions/workflows/readthedocs.yml) @@ -6,16 +7,19 @@ [![flake8](https://github.com/Curts0/PyTabular/actions/workflows/flake8.yml/badge.svg?branch=master)](https://github.com/Curts0/PyTabular/actions/workflows/flake8.yml) [![docstr-coverage](https://github.com/Curts0/PyTabular/actions/workflows/docstr-coverage.yml/badge.svg)](https://github.com/Curts0/PyTabular/actions/workflows/docstr-coverage.yml) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) + ### What is it? -[PyTabular](https://github.com/Curts0/PyTabular) (**python-tabular** in [pypi](https://pypi.org/project/python-tabular/)) is a python package that allows for programmatic execution on your tabular models! This is possible thanks to [Pythonnet](https://pythonnet.github.io/) and Microsoft's [.Net APIs on Azure Analysis Services](https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices?view=analysisservices-dotnet). Currently this build is tested and working on **Windows Operating System only**. Help is needed to expand this for other os. See the [Documentation Here](https://curts0.github.io/PyTabular/). PyTabular is still considered alpha. Please send bugs my way! Preferably in the issues section in Github. +[PyTabular](https://github.com/Curts0/PyTabular) (**python-tabular** in [pypi](https://pypi.org/project/python-tabular/)) is a python package that allows for programmatic execution on your tabular models! This is possible thanks to [Pythonnet](https://pythonnet.github.io/) and Microsoft's [.Net APIs on Azure Analysis Services](https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices?view=analysisservices-dotnet). Currently, this build is tested and working on **Windows Operating System only**. Help is needed to expand this for another os. See the [Documentation Here](https://curts0.github.io/PyTabular/). PyTabular is still considered alpha. Please send bugs my way! Preferably in the issues section in Github. ### Getting Started + See the [Pypi project](https://pypi.org/project/python-tabular/) for available versions. !!! DANGER "Before 0.3.5" **To become PEP8 compliant with naming conventions, serious name changes were made in 0.3.5.** Install v. 0.3.4 or lower to get the older naming conventions. + ```powershell title="Install Example" python3 -m pip install python-tabular @@ -23,7 +27,8 @@ python3 -m pip install python-tabular python3 -m pip install python-tabular==0.3.4 ``` -In your python environment, import pytabular and call the main Tabular Class. Only parameter needed is a solid connection string. +In your python environment, import pytabular and call the main Tabular Class. The only parameter needed is a solid connection string. + ```python title="Connecting to Model" import pytabular model = pytabular.Tabular(CONNECTION_STR) # (1) @@ -32,12 +37,14 @@ model = pytabular.Tabular(CONNECTION_STR) # (1) 1. That's it. A solid connection string. You may have noticed some logging into your console. I'm a big fan of logging, if you don't want any just get the logger and disable it. + ```python title="Logging Example" import pytabular pytabular.logger.disabled = True ``` -You can query your models with the `query()` method from your tabular class. For Dax Queries, it will need the full Dax syntax. See [EVALUATE example](https://dax.guide/st/evaluate/). This will return a [Pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html). If you are looking to return a single value, see below. Simply wrap your query in the the curly brackets. The method will take that single cell table and just return the individual value. You can also query your DMV. See below for example. +You can query your models with the `query()` method from your tabular class. For Dax Queries, it will need the full Dax syntax. See [EVALUATE example](https://dax.guide/st/evaluate/). This will return a [Pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html). If you are looking to return a single value, see below. Simply wrap your query in the curly brackets. The method will take that single-cell table and just return the individual value. You can also query your DMV. See below for example. + ```python title="Query Examples" #Run basic queries DAX_QUERY = "EVALUATE TOPN(100, 'Table1')" @@ -59,10 +66,10 @@ model.query(FILE_PATH) # (4) 1. Returns a `pd.DataFrame()`. 2. Returns a `pd.DataFrame()`. 3. This will return a single value. Example, `1` or `'string'`. -4. This will return same logic as above, single values if possible else will return `pd.DataFrame()`. Supply any file type. - +4. This will return the same logic as above, single values if possible else will return `pd.DataFrame()`. Supply any file type. You can also explore your tables, partitions, columns, etc. via the attributes of your `Tabular` class. + ```python title="Usage Examples" model.Tables['Table Name'].refresh() # (1) @@ -82,10 +89,11 @@ model.Tables['Table Name'].Columns['Column Name'].distinct_count() # (5) 1. Refresh a specific table. The `.Tables` is your attribute to gain access to your `PyTables` class. From that, you can iterate into specific `PyTable` classes. 2. Refresh a specific partition. 3. Get the last refresh time of a specific partition. Notice this time that instead of the partition name, an `int` was used to index into the specific `PyPartition`. -4. Get row count of a table. -5. Get distinct count of a column. +4. Get the row count of a table. +5. Get a distinct count of a column. + +Use the `refresh()` method to handle refreshes on your model. This is synchronous. Should be flexible enough to handle a variety of inputs. See [PyTabular Docs for Refreshing Tables and Partitions](https://curts0.github.io/PyTabular/refresh). The most basic way to refresh is to input the table name string. The method will search for the table and output exception if unable to find it. For partitions, you will need a key, and value combination. Example, `{'Table1':'Partition1'}`. You can also take the key-value pair and iterate through a group of partitions. Example, `{'Table1':['Partition1','Partition2']}`. Rather than providing a string, you can also input the actual class. See below for those examples. You can access them from the built-in attributes `self.Tables`, `self.Partitions`. -Use the `refresh()` method to handle refreshes on your model. This is synchronous. Should be flexible enough to handle a variety of inputs. See [PyTabular Docs for Refreshing Tables and Partitions](https://curts0.github.io/PyTabular/refresh). Most basic way to refresh is input the table name string. The method will search for table and output exception if unable to find it. For partitions you will need a key, value combination. Example, `{'Table1':'Partition1'}`. You can also take the key value pair and iterate through a group of partitions. Example, `{'Table1':['Partition1','Partition2']}`. Rather than providing a string, you can also input the actual class. See below for those examples. You can acess them from the built in attributes `self.Tables`, `self.Partitions`. ```python title="Refresh Examples" model.refresh('Table Name') # (1) @@ -123,19 +131,21 @@ model.refresh(['Table1','Table2'], trace = None) # (9) ``` 1. Basic refresh of a specific table by table name string. -2. Basic refresh of a group of tables by table name strings. Example is with list, but as long as it's iterable you should be fine. -3. Refresh of a table by passing the `PyTable` class. -4. Refresh of a partition by passing the `PyPartition` class. -5. Refresh a specific partition by passing a dictionary with table name as key and partition name as value. +2. Basic refresh of a group of tables by table name strings. An example is with a list, but as long as it's iterable you should be fine. +3. Refresh a table by passing the `PyTable` class. +4. Refresh a partition by passing the `PyPartition` class. +5. Refresh a specific partition by passing a dictionary with the table name as the key and the partition name as the value. 6. Get crazy. Pass all kinds of weird combinations. 7. Basic refresh from a `PyTable` class. 8. Basic refresh from a `PyPartition` class. -9. By default a `RefreshTrace` is started during refresh. It can be disabled by setting `trace = None`. +9. By default, a `RefreshTrace` is started during refresh. It can be disabled by setting `trace = None`. ### Use Cases -#### If blank table, then refresh table. +#### If a blank table, then refresh the table. + This will use the function [find_zero_rows](https://curts0.github.io/PyTabular/PyTables/#pytabular.table.PyTables.find_zero_rows) and the method [refresh](https://curts0.github.io/PyTabular/PyTables/#pytabular.table.PyTables.refresh) from the Tabular class. + ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) @@ -145,6 +155,7 @@ if len(tables) > 0: ``` Maybe you only want to check a subset of tables? Like `find()` tables with 'fact' in the name, then check if any facts are blank. + ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) @@ -154,7 +165,9 @@ if len(tables) > 0: ``` #### Sneak in a refresh. -This will use the method [is_process](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.is_process) and the method [refresh](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.refresh) from the Tabular class. It will check the DMV to see if any jobs are currently running classified as processing. + +This will use the method [is_process](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.is_process) and the method [refresh](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.refresh) from the Tabular class. It will check the DMV to see if any jobs are currently running and classified as processing. + ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) @@ -164,8 +177,10 @@ else: model.refresh(TABLES_OR_PARTITIONS_TO_REFRESH) ``` -#### Show refresh times in model. -This will use the function [last_refresh](https://curts0.github.io/PyTabular/PyTables/#pytabular.table.PyTables.last_refresh) and the method [create_table](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.create_table) from the Tabular class. It will search through the model for all tables and partitions and pull the 'RefreshedTime' property from it. It will return results into a pandas dataframe, which will then be converted into an M expression used for a new table. +#### Show refresh times in the model. + +This will use the function [last_refresh](https://curts0.github.io/PyTabular/PyTables/#pytabular.table.PyTables.last_refresh) and the method [create_table](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.create_table) from the Tabular class. It will search through the model for all tables and partitions and pull the 'RefreshedTime' property from it. It will return results into a pandas data frame, which will then be converted into an M expression used for a new table. + ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) @@ -173,9 +188,10 @@ df = model.Tables.last_refresh() model.create_table(df, 'Refresh Times') ``` +#### If BPA Violation, then reverts deployment. + +This uses a few things. First the [BPA Class](https://curts0.github.io/PyTabular/best_practice_analyzer/), then the [TE2 Class](https://curts0.github.io/PyTabular/tabular_editor/), and will finish with the [analyze_bpa](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.analyze_bpa) method. Did not want to reinvent the wheel with the amazing work done with Tabular Editor and its BPA capabilities. -#### If BPA Violation, then revert deployment. -This uses a few things. First the [BPA Class](https://curts0.github.io/PyTabular/best_practice_analyzer/), then the [TE2 Class](https://curts0.github.io/PyTabular/tabular_editor/), and will finish with the [analyze_bpa](https://curts0.github.io/PyTabular/Tabular/#pytabular.pytabular.Tabular.analyze_bpa) method. Did not want to re-invent the wheel with the amazing work done with Tabular Editor and it's BPA capabilities. ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) @@ -190,8 +206,10 @@ if len(results) > 0: #Revert deployment here! ``` -#### Loop through and query Dax files -Let's say you have multiple dax queries you would like to store and run through as checks. The [query](https://curts0.github.io/PyTabular/query/#pytabular.query.Connection.query) method on the Tabular class can also take file paths. It can really be any file type as it's just checking os.path.isfile(). But would suggest `.dax` or `.txt`. It will read the file and use that as the new `query_str` argument. +#### Loop through and query DAX files + +Let's say you have multiple DAX queries you would like to store and run through as checks. The [query](https://curts0.github.io/PyTabular/query/#pytabular.query.Connection.query) method on the Tabular class can also take file paths. It can be any file type as it's just checking `os.path.isfile()`. But would suggest `.dax` or `.txt`. It will read the file and use that as the new `query_str` argument. + ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) @@ -206,7 +224,9 @@ for file_path in LIST_OF_FILE_PATHS: ``` #### Advanced Refreshing with Pre and Post Checks -Maybe you are introducing new logic to a fact table, and you need to ensure that a measure checking last month values never changes. To do that you can take advantage of the `RefreshCheck` and `RefreshCheckCollection` classes. But using those you can build out something that would first check the results of the measure, then refresh, then check the results of the measure after refresh, and lastly perform your desired check. In this case the `pre` value matches the `post` value. When refreshing, if your pre does not equal post, it would fail and give an assertion error in your logging. + +Maybe you are introducing new logic to a fact table, and you need to ensure that a measure checking last month's values never changes. To do that you can take advantage of the `RefreshCheck` and `RefreshCheckCollection` classes. But using those you can build out something that would first check the results of the measure, then refresh, then check the results of the measure after the refresh, and lastly perform your desired check. In this case, the `pre` value matches the `post` value. When refreshing, if your pre does not equal post, it would fail and give an assertion error in your logging. + ```python from pytabular import Tabular from pytabular.refresh import RefreshCheck, RefreshCheckCollection @@ -231,12 +251,14 @@ all_refresh_check = RefreshCheckCollection([sum_of_last_month_sales]) model.Refresh( 'Fact Table Name', refresh_checks = RefreshCheckCollection([sum_of_last_month_sales]) - + ) ``` #### Query as Another User -There are plenty of tools that allow you to query as an 'Effective User' inheriting their security when querying. This is an extremely valuable concept built natively into the .Net apis. My only gripe is they were all UI based. This allows you to programmatically connect as an effective user and query in Python. You could easily loop through all your users to run tests on their security. + +There are plenty of tools that allow you to query as an 'Effective User' inheriting their security when querying. This is an extremely valuable concept built natively into the .Net APIs. My only gripe is they were all UI based. This allows you to programmatically connect as an effective user and query in Python. You could easily loop through all your users to run tests on their security. + ```python import pytabular as p @@ -270,7 +292,9 @@ model.query(query_str, effective_user = user_email) ``` #### Refresh Related Tables + Ever need to refresh related tables of a Fact? Now should be a lot easier. + ```python import pytabular as p @@ -285,29 +309,43 @@ tables.refresh() ``` ## Documenting a Model -The Tabular model contains a lot of information that can be used to generation documentation if filled in. Currently the markdown files are generated with the Docusaurs heading in place, but this will be changed in future to support multiple documentation platforms. -**Tip**: With Tabular Editor 2 (Free) or 3 (Paid) you can easily add Descriptioms, Translations (Cultures) and other additonal information that can later be used for generating the documentation. +The Tabular model contains a lot of information (meta-data) that can be used to generate documentation if filled in. Currently, the markdown files are generated with the Docusaurs heading in place, but this will be changed in the future to support multiple documentation platforms. + +**Tip**: With Tabular Editor 2 (Free) or 3 (Paid) you can easily add Descriptions, Translations (Cultures) and other additional information that can later be used for generating the documentation. Args: - **model**: Tabular -- **friendly_name**: Default > No Value +- **friendly_name**: Default > No Value -To specify the location of the docs, just supply the save location with a new folder name argument. +To specify the location of the docs, just supply the location of where you want to store the files (=`save_location`). - **save_location**: Default > docs -Each page in the generation process has it's own specific name, with these arguments you can rename them to your liking. +Each page in the generation process has its own specific name, with these arguments you can rename them to your liking. -- **general_page_url**: Default > 1-general-information.md -- **measure_page_url**: Default > 2-measures.md -- **table_page_url**: Default > 3-tables.md +- **general_page_url**: Default > index.md +- **measure_page_url**: Default > measures.md +- **table_folder**: Default > tables - **column_page_url**: Default > 4-columns.md -- **roles_page_url**: Default > 5-roles.md + +**Folder structure** + +``` +adventure-works > Model Name +└─── index.md > General Information +└─── measures.md > Page with all measures in the model. +└─── tables +│ └─── index.md > Overview page with all tables in the model and a summary per table. +| └─── technical_table_name.md > Details of a specific table with all columns and attributes. +| └─── ......md +| └─── ......md +``` ### Documenting a Model -The simpelst way to document a tabular model is to connect to the model, and initialize the documentation and execute `save_documentation()`. + +The simplest way to document a tabular model is to connect to the model, initialize the documentation and execute `save_documentation()`. ```python import pytabular @@ -315,10 +353,10 @@ import pytabular # Connect to a Tabular Model Model model = pytabular.Tabular(CONNECTION_STR) -# Initiate the Docs +# Initiate the Docs docs = pytabular.ModelDocumenter(model) -# Generate the pages. +# Generate the pages. docs.generate_documentation_pages() # Save docs to the default location @@ -326,7 +364,8 @@ docs.save_documentation() ``` ### Documenting a Model with Cultures -Some model creators choose to add cultures to a tabular model for different kinds of reasons. We can leverage those cultures to use the translation names instead of the original object names. In order to this you can set translations to `True` and specify the culture you want to use (e.g. `'en-US'`). + +Some model creators choose to add cultures to a tabular model for different kinds of reasons. We can leverage those cultures to use the translation names instead of the original object names. To enable this, you can set translations to `True` and specify the culture you want to use (e.g. `'en-US'`). ```python import pytabular @@ -334,25 +373,28 @@ import pytabular # Connect to a Tabular Model Model model = pytabular.Tabular(CONNECTION_STR) -# Initiate the Docs +# Initiate the Docs docs = pytabular.ModelDocumenter(model) # Set the translation for documentation to an available culture. -# By setting the Tranlsations to `True` it will check if it exists and if it does, +# By setting the Tranlsations to `True` it will check if it exists and if it does, # it will start using the translations for the docs docs.set_translations( - enable_translations = True, + enable_translations = True, culture = 'en-US' ) -# Generate the pages. +# Generate the pages. docs.generate_documentation_pages() # Save docs to the default location docs.save_documentation() ``` + ### Documenting a Power BI > Local Model. + The Local model doesn't have a "name", only an Id. So we need to Supply a "Friendly Name", which will be used to store the markdown files. + ```python import pytabular @@ -365,7 +407,7 @@ docs = pytabular.ModelDocumenter( friendly_name = "Adventure Works" ) -# Generate the pages. +# Generate the pages. docs.generate_documentation_pages() # Save docs to the default location @@ -373,4 +415,5 @@ docs.save_documentation() ``` ### Contributing + See [contributing.md](CONTRIBUTING.md) diff --git a/pytabular/culture.py b/pytabular/culture.py index d2cf3d2..d53f0a9 100644 --- a/pytabular/culture.py +++ b/pytabular/culture.py @@ -64,7 +64,7 @@ def get_translation( ] return translations[0] except Exception: - return {"object_not_found": "Not Available"} + return {"object_translation": object_name} class PyCultures(PyObjects): diff --git a/pytabular/document.py b/pytabular/document.py index 683035c..617e9d9 100644 --- a/pytabular/document.py +++ b/pytabular/document.py @@ -294,7 +294,7 @@ def create_markdown_for_measure(self, object: PyMeasure) -> str: "", "" f"{self.generate_object_properties(object_properties)}" "", f'```dax title="Technical: {object.Name}"', - f" {object.Expression}", + f"{object.Expression}", "```", "---", ] @@ -435,7 +435,7 @@ def create_markdown_for_table(self, object: PyTable) -> str: f"{self.generate_object_properties(object_properties)}", "", f'```{partition_type} title="Table Source: {object.Name}"', - f" {partition_source}", + f"{partition_source}", "```", "---", ] @@ -515,7 +515,7 @@ def create_markdown_for_column(self, object: PyColumn) -> str: obj_text.extend( ( f'```dax title="Technical: {object.Name}"', - f" {object.Expression}", + f"{object.Expression}", "```", ) ) diff --git a/pytabular/object.py b/pytabular/object.py index 3202370..05262a6 100644 --- a/pytabular/object.py +++ b/pytabular/object.py @@ -139,3 +139,23 @@ def find(self, object_str: str): if object_str.lower() in object.Name.lower() ] return self.__class__.mro()[0](items) + + def get(self, object_str: str, alt_result: str = "") -> str: + """Gets the object based on str. + + If the object isnt found, then an alternate result + can be supplied as an argument. + + Args: + object_str (str): str to lookup object + alt_result (str): str to return when value isn't found. + + Returns: + str: Result of the lookup, or the alternate result. + """ + try: + return self.__getitem__(object_str) + except Exception as e: + Console().print(e) + + return alt_result From 33ad15d293c0df42d9198505e2e1ac60d45a0840 Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Fri, 17 Mar 2023 16:16:39 -0500 Subject: [PATCH 2/2] update 0.4.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7befe14..06bb48e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "python_tabular" -version = "0.3.9" +version = "0.4.0" authors = [ { name="Curtis Stallings", email="curtisrstallings@gmail.com" }, ]