Skip to content

Commit 30867f4

Browse files
committed
add package
1 parent 9ad525f commit 30867f4

File tree

118 files changed

+25021
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+25021
-0
lines changed

cleanlab/.ci/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# .ci
2+
3+
This directory contains support scripts for CI.

cleanlab/.ci/nblint.py

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
Lint Jupyter notebooks being checked in to this repo.
5+
6+
Currently, this "linter" only checks one property, that the notebook's output
7+
cells are empty, to avoid bloating the repository size.
8+
"""
9+
10+
11+
import argparse
12+
import json
13+
import os
14+
import sys
15+
16+
17+
def main():
18+
opts = get_opts()
19+
notebooks = find_notebooks(opts.dir)
20+
for notebook in notebooks:
21+
check(notebook)
22+
23+
24+
def get_opts():
25+
parser = argparse.ArgumentParser()
26+
parser.add_argument("dir", help="Directories to search for notebooks", type=str, nargs="+")
27+
return parser.parse_args()
28+
29+
30+
def find_notebooks(dirs):
31+
notebooks = set()
32+
for d in dirs:
33+
for dirname, _, filenames in os.walk(d):
34+
for filename in filenames:
35+
if not filename.endswith(".ipynb"):
36+
continue
37+
full_path = os.path.join(dirname, filename)
38+
notebooks.add(full_path)
39+
return notebooks
40+
41+
42+
def check(notebook):
43+
with open(notebook) as f:
44+
contents = json.load(f)
45+
check_outputs_empty(notebook, contents)
46+
check_no_trailing_newline(notebook, contents)
47+
48+
49+
def check_outputs_empty(path, contents):
50+
for i, cell in enumerate(contents["cells"]):
51+
if "outputs" in cell and cell["outputs"] != []:
52+
fail(path, "output is not empty", i)
53+
54+
55+
def check_no_trailing_newline(path, contents):
56+
"""
57+
Checks that the last line of a code cell doesn't end with a newline, which
58+
produces an unnecessarily newline in the doc rendering.
59+
"""
60+
for i, cell in enumerate(contents["cells"]):
61+
if cell["cell_type"] != "code":
62+
continue
63+
if "source" not in cell or len(cell["source"]) == 0:
64+
fail(path, "code cell is empty", i)
65+
if cell["source"][-1].endswith("\n"):
66+
fail(path, "unnecessary trailing newline", i)
67+
68+
69+
def fail(path, message, cell=None):
70+
cell_msg = f" [cell {cell}]" if cell is not None else ""
71+
print(f"{path}{cell_msg}: {message}")
72+
sys.exit(1)
73+
74+
75+
if __name__ == "__main__":
76+
main()

cleanlab/.coveragerc

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# .coveragerc to control coverage.py
2+
# https://coverage.readthedocs.io/en/latest/config.html
3+
[run]
4+
branch = True
5+
omit =
6+
cleanlab/experimental/*
7+
8+
[report]
9+
# Regexes for lines to exclude from consideration
10+
exclude_lines =
11+
# Have to re-enable the standard pragma
12+
pragma: no cover
13+
14+
# Don't complain about missing debug-only code:
15+
def __repr__
16+
if self\.debug
17+
18+
# Don't complain if tests don't hit defensive assertion / error-reporting code:
19+
raise
20+
except
21+
assert
22+
warnings.warn
23+
24+
# Use print(f"...") for printing out non-pure strings:
25+
print\(\"
26+
27+
# Don't complain if non-runnable code isn't run:
28+
if 0:
29+
if __name__ == .__main__.:
30+
31+
# Can't unit test big datasets:
32+
if big_dataset
33+
34+
ignore_errors = True
35+
36+
[html]
37+
directory = coverage_html_report

cleanlab/.editorconfig

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
root = true
2+
3+
[*]
4+
charset = utf-8
5+
end_of_line = lf
6+
insert_final_newline = true
7+
indent_style = space
8+
trim_trailing_whitespace = true
9+
indent_size = 4
10+
11+
[*.ipynb]
12+
insert_final_newline = false
13+
14+
[*.{yml,yaml}]
15+
indent_size = 2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
name: Bug Report
3+
about: Use this template to report a bug.
4+
title: "[Short summary of the bug]"
5+
labels: "needs triage"
6+
---
7+
8+
<!-- Briefly summarize the issue. -->
9+
10+
# Stack trace
11+
12+
<!-- If applicable, please include a full stack trace here. If you need to omit
13+
the bottom of the stack trace (e.g. it includes stack frames from your private
14+
code), that is okay. Try to include all cleanlab stack frames. -->
15+
16+
# Steps to reproduce
17+
18+
<!-- Be as detailed as possible here. If possible, include a self-contained
19+
runnable example that demonstrates the issue. Remember to supply any data
20+
necessary to run your example, or construct your example with synthetic data.
21+
This is not strictly required, but the more detailed your bug report, the more
22+
quickly we can help you and fix the bug. -->
23+
24+
# Additional information
25+
26+
- **Cleanlab version**: <!-- `cleanlab.__version__`, or the git commit hash if you're using an unreleased version -->
27+
- **Operating system**: <!-- e.g. macOS 12.1, Ubuntu 20.04, Windows 10 -->
28+
- **Python version**: <!-- you can find this with `python --version` -->
29+
30+
<!-- Please include any other information that could be helpful for debugging. -->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
---
2+
name: Feature Request
3+
about: Use this template to ask for new functionality in cleanlab.
4+
title: "[Short summary of the feature]"
5+
labels: "needs triage"
6+
---
7+
8+
<!-- Briefly summarize the proposed feature. -->
9+
10+
# Details
11+
12+
<!--
13+
Describe your proposed feature in more detail. Answer any of the following
14+
questions that you can:
15+
* What is the problem you're trying to solve?
16+
* What tasks or workflows would be enabled by having support for your
17+
proposed feature in cleanlab?
18+
* Can you share code snippets or pseudocode describing uses of your feature?
19+
* Can you share any datasets that can help us assess the usefulness of the
20+
proposed feature?
21+
* Have you considered any alternatives to your proposed feature/design?
22+
* How are you working around the lack of native support for your proposed
23+
feature?
24+
-->
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
name: Help
3+
about: Use this template to ask for help.
4+
title: "[Short summary of the question]"
5+
labels: "question"
6+
---
7+
8+
<!--
9+
Please be as detailed as possible in your question.
10+
11+
We will answer questions posted here, but you will likely get an answer faster
12+
by posting in our Slack Community:
13+
14+
https://cleanlab.ai/slack
15+
-->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
name: Other
3+
about: Use this template when no other template applies.
4+
title: "[Short summary of the issue]"
5+
labels: "needs triage"
6+
---
7+
8+
<!-- Please be as detailed as possible in your issue. -->

cleanlab/.github/workflows/ci.yml

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name: CI
2+
on:
3+
push:
4+
pull_request:
5+
schedule:
6+
- cron: '0 8 * * 6'
7+
jobs:
8+
test:
9+
name: "Test: Python ${{ matrix.python }} on ${{ matrix.os }}"
10+
runs-on: ${{ matrix.os }}
11+
strategy:
12+
matrix:
13+
os:
14+
- ubuntu-latest
15+
- macos-latest
16+
- windows-latest
17+
python:
18+
- "3.6"
19+
- "3.7"
20+
- "3.8"
21+
- "3.9"
22+
- "3.10"
23+
steps:
24+
- uses: actions/checkout@v2
25+
- uses: actions/setup-python@v2
26+
with:
27+
python-version: ${{ matrix.python }}
28+
- name: Install cleanlab
29+
run: pip install -e .
30+
- name: Check cleanlab runs without optional dependencies
31+
run: python3 -c "import cleanlab"
32+
- name: Install development dependencies
33+
run: pip install -r requirements-dev.txt
34+
- name: Overwrite tensorflow version on Windows
35+
if: matrix.os == 'windows-latest'
36+
run: |
37+
pip uninstall -y tensorflow
38+
pip install tensorflow-cpu
39+
- name: Test with coverage
40+
run: pytest --verbose --cov=cleanlab/ --cov-config .coveragerc --cov-report=xml
41+
- uses: codecov/codecov-action@v2
42+
typecheck:
43+
name: Type check
44+
runs-on: ubuntu-latest
45+
steps:
46+
- uses: actions/checkout@v2
47+
- uses: actions/setup-python@v2
48+
- name: Install dependencies
49+
run: |
50+
python -m pip install --upgrade pip
51+
pip install . # install dependencies
52+
pip install -r requirements-dev.txt # install development dependencies and type stubs
53+
- name: Type check
54+
run: mypy --install-types --non-interactive --allow-redefinition cleanlab
55+
fmt:
56+
name: Format
57+
runs-on: ubuntu-latest
58+
steps:
59+
- uses: actions/checkout@v2
60+
- uses: psf/black@stable
61+
pylint:
62+
name: Check for unused/wildcard imports
63+
runs-on: ubuntu-latest
64+
steps:
65+
- uses: actions/checkout@v3
66+
- uses: actions/setup-python@v4
67+
with:
68+
python-version: '3.10'
69+
- name: Install pylint
70+
run: pip install pylint
71+
- name: Error on unused imports
72+
run: pylint --disable=all --enable=unused-import cleanlab tests
73+
- name: Error on wildcard imports
74+
run: pylint --disable=all --enable=wildcard-import cleanlab tests
75+
nblint:
76+
name: Lint Notebooks
77+
runs-on: ubuntu-latest
78+
steps:
79+
- uses: actions/checkout@v2
80+
- uses: actions/setup-python@v2
81+
with:
82+
python-version: '3.10'
83+
- name: nblint
84+
run: ./.ci/nblint.py docs

0 commit comments

Comments
 (0)