datamade · fgregg · Oct 25, 2024 · Oct 25, 2024 · Oct 25, 2024 · Oct 25, 2024
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length=160
+extend-ignore = E203
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -0,0 +1,78 @@
+name: Test and Publish Python Package
+
+on: [push, pull_request]
+
+permissions:
+  contents: read
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install .[dev]
+      - name: flake8
+        run: flake8 probablepeople tests
+      - name: isort
+        if: always()
+        run: isort --check-only .
+      - name: black
+        if: always()
+        run: black . --check
+      - name: mypy
+        if: always()
+        run: mypy
+  test:
+    timeout-minutes: 40
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [windows-latest, macos-latest, ubuntu-latest]
+        python-version: [3.9, "3.10", "3.11", "3.12", "3.13-dev"]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -e .[dev]
+    - name: pytest
+      run: pytest
+
+  deploy:
+    if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
+    needs: [test, lint]
+
+    runs-on: ubuntu-latest
+
+    name: Upload release to PyPI
+    environment:
+      name: pypi
+      url: https://pypi.org/p/probablepeople
+    permissions:
+      id-token: write 
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package distributions to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,15 @@
+repos:
+  - repo: https://github.com/psf/black
+    rev: 24.8.0 
+    hooks:
+    - id: black
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        name: isort (python)
+  - repo: https://github.com/pycqa/flake8
+    rev: "7.1.1"
+    hooks:
+      - id: flake8
+        args: [--config=.flake8]
diff --git a/.travis.yml b/.travis.yml
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1,2 @@
 include LICENSE
+include name_data/labeled/*
diff --git a/Makefile b/Makefile
diff --git a/README.md b/README.md
@@ -52,14 +52,13 @@ probablepeople learns how to parse names/companies through a body of training da
 Probablepeople uses [parserator](https://github.com/datamade/parserator), a library for making and improving probabilistic parsers - specifically, parsers that use [python-crfsuite](https://github.com/tpeng/python-crfsuite)'s implementation of conditional random fields. Parserator allows you to train probablepeople's model (a .crfsuite settings file) on labeled training data, and provides tools for easily adding new labeled training data.
 #### Building & testing development code
 
-  ```
-  git clone https://github.com/datamade/probablepeople.git  
-  cd probablepeople  
-  pip install -r requirements.txt  
-  python setup.py develop
-  make all
-  nosetests .  
-  ```  
+```console
+git clone https://github.com/datamade/probablepeople.git  
+cd probablepeople  
+pip install -e .
+pytest
+```
+
 #### Creating/adding labeled training data (.xml outfile) from unlabeled raw data (.csv infile)  
 
 If there are name/company formats that the parser isn't performing well on, you can add them to training data. As probablepeople continually learns about new cases, it will continually become smarter and more robust.
@@ -93,7 +92,7 @@ The parserator `label` command will start a console labeling task, where you wil
   parserator train name_data/labeled/person_labeled.xml,name_data/labeled/company_labeled.xml probablepeople --modelfile=generic
   parserator train name_data/labeled/person_labeled.xml probablepeople --modelfile=person
   parserator train name_data/labeled/company_labeled.xml probablepeople --modelfile=company
-  ```  
+  ```
 
 ## Errors and Bugs