@@ -383,3 +383,279 @@ def _check_invalid_fields(fields: list[str], valid_fields: list[str]):
383
383
for field in fields :
384
384
if field not in valid_fields :
385
385
raise ValueError (f"Invalid field: { field } " )
386
+
387
+
388
+ def get_api_details (module , exported_list ):
389
+ """
390
+ Retrieve the signatures and docstrings of the functions/classes in the exported list.
391
+
392
+ Parameters
393
+ ----------
394
+ module : module
395
+ The module from which to retrieve the functions/classes.
396
+ exported_list : list
397
+ A list of function/class names as strings.
398
+
399
+ Returns
400
+ -------
401
+ str
402
+ A string containing the combined class name, signature, and docstring.
403
+ """
404
+ api_text = ""
405
+
406
+ for fn in exported_list :
407
+
408
+ # Split the attribute path to handle nested attributes
409
+ parts = fn .split ("." )
410
+ obj = module
411
+ for part in parts :
412
+ obj = getattr (obj , part )
413
+
414
+ # Get the name of the object
415
+ obj_name = obj .__name__
416
+
417
+ # Get the function signature
418
+ sig = inspect .signature (obj )
419
+
420
+ # Get the docstring
421
+ doc = obj .__doc__
422
+
423
+ # Combine the class name, signature, and docstring
424
+ api_text += f"{ obj_name } { sig } \n { doc } \n \n "
425
+
426
+ return api_text
427
+
428
+
429
+ def _get_api_text () -> str :
430
+ """
431
+ Get the API documentation for the Pointblank library.
432
+
433
+ Returns
434
+ -------
435
+ str
436
+ The API documentation for the Pointblank library.
437
+ """
438
+
439
+ import pointblank
440
+
441
+ sep_line = "-" * 70
442
+
443
+ api_text = (
444
+ f"{ sep_line } \n This is the API documentation for the Pointblank library.\n { sep_line } \n \n "
445
+ )
446
+
447
+ #
448
+ # Lists of exported functions and methods in different families
449
+ #
450
+
451
+ validate_exported = [
452
+ "Validate" ,
453
+ "Thresholds" ,
454
+ "Schema" ,
455
+ ]
456
+
457
+ val_steps_exported = [
458
+ "Validate.col_vals_gt" ,
459
+ "Validate.col_vals_lt" ,
460
+ "Validate.col_vals_ge" ,
461
+ "Validate.col_vals_le" ,
462
+ "Validate.col_vals_eq" ,
463
+ "Validate.col_vals_ne" ,
464
+ "Validate.col_vals_between" ,
465
+ "Validate.col_vals_outside" ,
466
+ "Validate.col_vals_in_set" ,
467
+ "Validate.col_vals_not_in_set" ,
468
+ "Validate.col_vals_null" ,
469
+ "Validate.col_vals_not_null" ,
470
+ "Validate.col_vals_regex" ,
471
+ "Validate.col_vals_expr" ,
472
+ "Validate.col_exists" ,
473
+ "Validate.rows_distinct" ,
474
+ "Validate.col_schema_match" ,
475
+ "Validate.row_count_match" ,
476
+ "Validate.col_count_match" ,
477
+ ]
478
+
479
+ column_selection_exported = [
480
+ "col" ,
481
+ "starts_with" ,
482
+ "ends_with" ,
483
+ "contains" ,
484
+ "matches" ,
485
+ "everything" ,
486
+ "first_n" ,
487
+ "last_n" ,
488
+ ]
489
+
490
+ interrogation_exported = [
491
+ "Validate.interrogate" ,
492
+ "Validate.get_tabular_report" ,
493
+ "Validate.get_step_report" ,
494
+ "Validate.get_json_report" ,
495
+ "Validate.get_sundered_data" ,
496
+ "Validate.get_data_extracts" ,
497
+ "Validate.all_passed" ,
498
+ "Validate.n" ,
499
+ "Validate.n_passed" ,
500
+ "Validate.n_failed" ,
501
+ "Validate.f_passed" ,
502
+ "Validate.f_failed" ,
503
+ "Validate.warn" ,
504
+ "Validate.stop" ,
505
+ "Validate.notify" ,
506
+ ]
507
+
508
+ utilities_exported = [
509
+ "load_dataset" ,
510
+ "preview" ,
511
+ "get_column_count" ,
512
+ "get_row_count" ,
513
+ "config" ,
514
+ ]
515
+
516
+ validate_desc = """When peforming data validation, you'll need the `Validate` class to get the
517
+ process started. It's given the target table and you can optionally provide some metadata and/or
518
+ failure thresholds (using the `Thresholds` class or through shorthands for this task). The
519
+ `Validate` class has numerous methods for defining validation steps and for obtaining
520
+ post-interrogation metrics and data."""
521
+
522
+ val_steps_desc = """Validation steps can be thought of as sequential validations on the target
523
+ data. We call `Validate`'s validation methods to build up a validation plan: a collection of steps
524
+ that, in the aggregate, provides good validation coverage."""
525
+
526
+ column_selection_desc = """A flexible way to select columns for validation is to use the `col()`
527
+ function along with column selection helper functions. A combination of `col()` + `starts_with()`,
528
+ `matches()`, etc., allows for the selection of multiple target columns (mapping a validation across
529
+ many steps). Furthermore, the `col()` function can be used to declare a comparison column (e.g.,
530
+ for the `value=` argument in many `col_vals_*()` methods) when you can't use a fixed value
531
+ for comparison."""
532
+
533
+ interrogation_desc = """The validation plan is put into action when `interrogate()` is called.
534
+ The workflow for performing a comprehensive validation is then: (1) `Validate()`, (2) adding
535
+ validation steps, (3) `interrogate()`. After interrogation of the data, we can view a validation
536
+ report table (by printing the object or using `get_tabular_report()`), extract key metrics, or we
537
+ can split the data based on the validation results (with `get_sundered_data()`)."""
538
+
539
+ utilities_desc = """The utilities group contains functions that are helpful for the validation
540
+ process. We can load datasets with `load_dataset()`, preview a table with `preview()`, and set
541
+ global configuration parameters with `config()`."""
542
+
543
+ #
544
+ # Add headings (`*_desc` text) and API details for each family of functions/methods
545
+ #
546
+
547
+ api_text += f"""\n ## The Validate family\n \n { validate_desc } \n \n """
548
+ api_text += get_api_details (module = pointblank , exported_list = validate_exported )
549
+
550
+ api_text += f"""\n ## The Validation Steps family\n \n { val_steps_desc } \n \n """
551
+ api_text += get_api_details (module = pointblank , exported_list = val_steps_exported )
552
+
553
+ api_text += f"""\n ## The Column Selection family\n \n { column_selection_desc } \n \n """
554
+ api_text += get_api_details (module = pointblank , exported_list = column_selection_exported )
555
+
556
+ api_text += f"""\n ## The Interrogation and Reporting family\n \n { interrogation_desc } \n \n """
557
+ api_text += get_api_details (module = pointblank , exported_list = interrogation_exported )
558
+
559
+ api_text += f"""\n ## The Utilities family\n \n { utilities_desc } \n \n """
560
+ api_text += get_api_details (module = pointblank , exported_list = utilities_exported )
561
+
562
+ # Modify language syntax in all code cells
563
+ api_text = api_text .replace ("{python}" , "python" )
564
+
565
+ # Remove code cells that contain `#| echo: false` (i.e., don't display the code)
566
+ api_text = re .sub (r"```python\n\s*.*\n\s*.*\n.*\n.*\n.*```\n\s*" , "" , api_text )
567
+
568
+ return api_text
569
+
570
+
571
+ def _get_examples_text () -> str :
572
+ """
573
+ Get the examples for the Pointblank library. These examples are extracted from the Quarto
574
+ documents in the `docs/demos` directory.
575
+
576
+ Returns
577
+ -------
578
+ str
579
+ The examples for the Pointblank library.
580
+ """
581
+
582
+ sep_line = "-" * 70
583
+
584
+ examples_text = (
585
+ f"{ sep_line } \n This is a set of examples for the Pointblank library.\n { sep_line } \n \n "
586
+ )
587
+
588
+ # A large set of examples is available in the docs/demos directory, and each of the
589
+ # subdirectories contains a different example (in the form of a Quarto document)
590
+
591
+ example_dirs = [
592
+ "01-starter" ,
593
+ "02-advanced" ,
594
+ "03-data-extracts" ,
595
+ "04-sundered-data" ,
596
+ "05-step-report-column-check" ,
597
+ "06-step-report-schema-check" ,
598
+ "apply-checks-to-several-columns" ,
599
+ "check-row-column-counts" ,
600
+ "checks-for-missing" ,
601
+ "col-vals-custom-expr" ,
602
+ "column-selector-functions" ,
603
+ "comparisons-across-columns" ,
604
+ "expect-no-duplicate-rows" ,
605
+ "expect-no-duplicate-values" ,
606
+ "expect-text-pattern" ,
607
+ "failure-thresholds" ,
608
+ "mutate-table-in-step" ,
609
+ "numeric-comparisons" ,
610
+ "schema-check" ,
611
+ "set-membership" ,
612
+ "using-parquet-data" ,
613
+ ]
614
+
615
+ for example_dir in example_dirs :
616
+
617
+ link = f"https://posit-dev.github.io/pointblank/demos/{ example_dir } /"
618
+
619
+ # Read in the index.qmd file for each example
620
+ with open (f"docs/demos/{ example_dir } /index.qmd" , "r" ) as f :
621
+ example_text = f .read ()
622
+
623
+ # Remove the first eight lines of the example text (contains the YAML front matter)
624
+ example_text = "\n " .join (example_text .split ("\n " )[8 :])
625
+
626
+ # Extract the title of the example (the line beginning with `###`)
627
+ title = re .search (r"### (.*)" , example_text ).group (1 )
628
+
629
+ # The next line with text is the short description of the example
630
+ desc = re .search (r"(.*)\." , example_text ).group (1 )
631
+
632
+ # Get all of the Python code blocks in the example
633
+ # these can be identified as starting with ```python and ending with ```
634
+ code_blocks = re .findall (r"```python\n(.*?)```" , example_text , re .DOTALL )
635
+
636
+ # Wrap each code block with a leading ```python and trailing ```
637
+ code_blocks = [f"```python\n { code } ```" for code in code_blocks ]
638
+
639
+ # Collapse all code blocks into a single string
640
+ code_text = "\n \n " .join (code_blocks )
641
+
642
+ # Add the example title, description, and code to the examples text
643
+ examples_text += f"### { title } ({ link } )\n \n { desc } \n \n { code_text } \n \n "
644
+
645
+ return examples_text
646
+
647
+
648
+ def _get_api_and_examples_text () -> str :
649
+ """
650
+ Get the combined API and examples text for the Pointblank library.
651
+
652
+ Returns
653
+ -------
654
+ str
655
+ The combined API and examples text for the Pointblank library.
656
+ """
657
+
658
+ api_text = _get_api_text ()
659
+ examples_text = _get_examples_text ()
660
+
661
+ return f"{ api_text } \n \n { examples_text } "
0 commit comments