cmu-delphi
diff --git a/‎.bumpversion.cfg
Lines changed: 1 addition & 1 deletion b/‎.bumpversion.cfg
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/CONTRIBUTING.md
Lines changed: 8 additions & 5 deletions b/‎.github/CONTRIBUTING.md
Lines changed: 8 additions & 5 deletions
diff --git a/‎.github/pull_request_template.md
Lines changed: 2 additions & 2 deletions b/‎.github/pull_request_template.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/publish-release.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/publish-release.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python-ci.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/python-ci.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎Jenkinsfile
Lines changed: 38 additions & 15 deletions b/‎Jenkinsfile
Lines changed: 38 additions & 15 deletions
diff --git a/‎_delphi_utils_python/.bumpversion.cfg
Lines changed: 2 additions & 2 deletions b/‎_delphi_utils_python/.bumpversion.cfg
Lines changed: 2 additions & 2 deletions
diff --git a/‎_delphi_utils_python/DEVELOP.md
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/DEVELOP.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/Makefile
Lines changed: 5 additions & 4 deletions b/‎_delphi_utils_python/Makefile
Lines changed: 5 additions & 4 deletions
diff --git a/‎_delphi_utils_python/README.md
Lines changed: 29 additions & 2 deletions b/‎_delphi_utils_python/README.md
Lines changed: 29 additions & 2 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/__init__.py
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/delphi_utils/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/delphi_utils/geomap.py
Lines changed: 4 additions & 2 deletions b/‎_delphi_utils_python/delphi_utils/geomap.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/logger.py
Lines changed: 16 additions & 18 deletions b/‎_delphi_utils_python/delphi_utils/logger.py
Lines changed: 16 additions & 18 deletions
diff --git a/‎_delphi_utils_python/pyproject.toml
Lines changed: 57 additions & 0 deletions b/‎_delphi_utils_python/pyproject.toml
Lines changed: 57 additions & 0 deletions
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.55
+current_version = 0.3.56
 commit = True
 message = chore: bump covidcast-indicators to {new_version}
 tag = False
@@ -12,7 +12,9 @@ The production branch is configured to automatically deploy to our production en
 
 * everything else
 
-All other branches are development branches. We don't enforce a naming policy.
+All other branches are development branches. We don't enforce a naming policy, but it is recommended to prefix all branches you create with your name, username, or initials (e.g. `username/branch-name`).
+
+We don't forbid force-pushing, but please keep to a minimum and be careful of using when modifying a branch at the same time as others.
 
 ## Issues
 
@@ -29,7 +31,7 @@ So, how does one go about developing a pipeline for a new data source?
 ### tl;dr
 
 1. Create your new indicator branch from `main`.
-2. Build it using the appropriate template, following the guidelines in the included README.md and REVIEW.md files.
+2. Build it using the [indicator template](https://github.com/cmu-delphi/covidcast-indicators/tree/main/_template_python), following the guidelines in the included README.md, REVIEW.md, and INDICATOR_DEV_GUIDE.md files.
 3. Make some stuff!
 4. When your stuff works, push your development branch to remote, and open a PR against `main` for review.
 5. Once your PR has been merged, consult with a platform engineer for the remaining production setup needs. They will create a deployment workflow for your indicator including any necessary production parameters. Production secrets are encrypted in the Ansible vault. This workflow will be tested in staging by admins, who will consult you about any problems they encounter.
@@ -50,7 +52,7 @@ git checkout -b dev-my-feature-branch
 
 ### Creating your indicator
 
-Create a directory for your new indicator by making a copy of `_template_r` or `_template_python` depending on the programming language you intend to use. If using Python, add the name of the directory to the list found in `jobs > build > strategy > matrix > packages` in `.github/workflows/python-ci.yml`, which will enable automated checks for your indicator when you make PRs. The template copies of `README.md` and `REVIEW.md` include the minimum requirements for code structure, documentation, linting, testing, and method of configuration. Beyond that, we don't have any established restrictions on implementation; you can look at other existing indicators see some examples of code layout, organization, and general approach.
+Create a directory for your new indicator by making a copy of `_template_python`. (We also make a `_template_r` available, but R should be only used as a last resort, due to complications using it in production.) Add the name of the directory to the list found in `jobs > build > strategy > matrix > packages` in `.github/workflows/python-ci.yml`, which will enable automated checks for your indicator when you make PRs. The template copies of `README.md` and `REVIEW.md` include the minimum requirements for code structure, documentation, linting, testing, and method of configuration. Beyond that, we don't have any established restrictions on implementation; you can look at other existing indicators see some examples of code layout, organization, and general approach.
 
 * Consult your peers with questions! :handshake:
 
@@ -62,7 +64,7 @@ Once you have something that runs locally and passes tests you set up your remot
 git push -u origin dev-my-feature-branch
 ```
 
-You can then draft public API documentation for people who would fetch this
+You can then draft [public API documentation](https://cmu-delphi.github.io/delphi-epidata/) for people who would fetch this
 data from the API. Public API documentation is kept in the delphi-epidata
 repository, and there is a [template Markdown
 file](https://github.com/cmu-delphi/delphi-epidata/blob/main/docs/api/covidcast-signals/_source-template.md)
@@ -104,7 +106,8 @@ We use a branch-based git workflow coupled with [Jenkins](https://www.jenkins.io
   * Package - Tar and gzip the built environment.
   * Deploy - Trigger an Ansible playbook to place the built package onto the runtime host, place any necessary production configuration, and adjust the runtime envirnemnt (if necessary).
 
-There are several additional Jenkins-specific files that will need to be created for each indicator, as well as some configuration additions to the runtime host. It will be important to pair with a platform engineer to prepare the necessary production environment needs, test the workflow, validate on production, and ultimately sign off on a production release.
+There are several additional Jenkins-specific files that will need to be created for each indicator, as well as some configuration additions to the runtime host.
+It will be important to pair with a platform engineer to prepare the necessary production environment needs, test the workflow, validate on production, and ultimately sign off on a production release.
 
 ### Preparing container images of indicators
 
 
@@ -6,5 +6,5 @@ Itemize code/test/documentation changes and files added/removed.
 - change1
 - change2
 
-### Fixes 
-- Fixes #(issue)
+### Associated Issue(s) 
+- Addresses #(issue)
@@ -86,7 +86,7 @@ jobs:
       - name: Release
         run: |
           make release
-      - uses: actions/upload-artifact@v2
+      - uses: actions/upload-artifact@v4
         with:
           name: delphi_utils
           path: _delphi_utils_python/dist/*.tar.gz
 
@@ -51,7 +51,7 @@ jobs:
         with:
           python-version: 3.8
           cache: "pip"
-          cache-dependency-path: "setup.py"
+          cache-dependency-path: "pyproject.toml"
       - name: Install testing dependencies
         run: |
           python -m pip install --upgrade pip
 
@@ -10,7 +10,7 @@
    - TODO: #527 Get this list automatically from python-ci.yml at runtime.
  */
 
-def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "doctor_visits", "nwss_wastewater", "nssp"]
+def indicator_list = ['backfill_corrections', 'changehc', 'claims_hosp', 'google_symptoms', 'hhs_hosp', 'nchs_mortality', 'quidel_covidtest', 'sir_complainsalot', 'doctor_visits', 'nwss_wastewater', 'nssp']
 def build_package_main = [:]
 def build_package_prod = [:]
 def deploy_staging = [:]
@@ -19,39 +19,62 @@ def deploy_production = [:]
 pipeline {
     agent any
     stages {
-        stage('Build and Package main') {
+        stage('Build dev/feature branch') {
+            when  {
+                not {
+                    anyOf {
+                        branch 'main'
+                        branch 'prod'
+                    }
+                }
+            }
+            steps {
+                script {
+                    indicator_list.each { indicator ->
+                        stage("Build ${indicator}") {
+                            sh "jenkins/build-indicator.sh ${indicator}"
+                        }
+                    }
+                }
+            }
+        }
+        stage('Build and Package main branch') {
             when {
-                branch "main";
+                branch 'main'
             }
             steps {
                 script {
                     indicator_list.each { indicator ->
-                        build_package_main[indicator] = {
-                            sh "jenkins/build-and-package.sh ${indicator} main"
+                        stage("Build ${indicator}") {
+                            sh "jenkins/build-indicator.sh ${indicator}"
+                        }
+                        stage("Package ${indicator}") {
+                            sh "jenkins/package-indicator.sh ${indicator} main"
                         }
                     }
-                    parallel build_package_main
                 }
             }
         }
-        stage('Build and Package prod') {
+        stage('Build and Package prod branch') {
             when {
-                branch "prod";
+                branch 'prod'
             }
             steps {
                 script {
                     indicator_list.each { indicator ->
-                        build_package_prod[indicator] = {
-                            sh "jenkins/build-and-package.sh ${indicator} prod"
+                        stage("Build ${indicator}") {
+                            sh "jenkins/build-indicator.sh ${indicator}"
+                        }
+                        stage("Package ${indicator}") {
+                            sh "jenkins/package-indicator.sh ${indicator} prod"
                         }
                     }
-                    parallel build_package_prod
                 }
             }
         }
-        stage('Deploy staging') {
+        stage('Deploy main branch to staging env') {
             when {
-                branch "main";
+                branch 'main'
             }
             steps {
                 script {
@@ -64,9 +87,9 @@ pipeline {
                 }
             }
         }
-        stage('Deploy production') {
+        stage('Deploy prod branch to production env') {
             when {
-                branch "prod";
+                branch 'prod'
             }
             steps {
                 script {
 
@@ -1,9 +1,9 @@
 [bumpversion]
-current_version = 0.3.24
+current_version = 0.3.25
 commit = True
 message = chore: bump delphi_utils to {new_version}
 tag = False
 
-[bumpversion:file:setup.py]
+[bumpversion:file:pyproject.toml]
 
 [bumpversion:file:delphi_utils/__init__.py]
@@ -9,7 +9,7 @@ To install the module in your default version of Python, run the
 following from this directory:
 
 ```
-pip install .
+pip install -e '.[dev]'
 ```
 
 As described in each of the indicator code directories, you will want to install
 
@@ -6,12 +6,12 @@ venv:
 install: venv
 	. env/bin/activate; \
 	pip install wheel ; \
-	pip install -e .
+	pip install -e '.[dev]'
 
 install-ci: venv
 	. env/bin/activate; \
-	pip install wheel ; \
-	pip install .
+	pip install 'build[virtualenv]' pylint pytest pydocstyle wheel twine ; \
+	pip install '.[dev]'
 
 lint:
 	. env/bin/activate; pylint delphi_utils --rcfile=../pyproject.toml
@@ -30,4 +30,5 @@ clean:
 
 release: lint test
 	. env/bin/activate ; \
-	python setup.py sdist bdist_wheel
+	pip install 'build[virtualenv]' ; \
+	python -m build --sdist --wheel
@@ -17,5 +17,32 @@ Submodules:
 - `validator`: Data sanity checks and anomaly detection.
 
 
-Source code can be found here: 
-[https://github.com/cmu-delphi/covidcast-indicators/](https://github.com/cmu-delphi/covidcast-indicators/)
+Source code can be found here:
+[https://github.com/cmu-delphi/covidcast-indicators/](https://github.com/cmu-delphi/covidcast-indicators/)
+
+## Logger Usage
+
+Single-thread usage.
+
+```py
+from delphi_utils.logger import get_structured_logger
+
+logger = get_structured_logger('my_logger')
+logger.info('Hello, world!')
+```
+
+Multi-thread usage.
+
+```py
+from delphi_utils.logger import get_structured_logger, pool_and_threadedlogger
+
+def f(x, threaded_logger):
+    threaded_logger.info(f'x={x}')
+    return x*x
+
+logger = get_structured_logger('my_logger')
+logger.info('Hello, world!')
+with pool_and_threadedlogger(logger, n_cpu) as (pool, threaded_logger):
+    for i in range(10):
+        pool.apply_async(f, args=(i, threaded_logger))
+```
@@ -15,4 +15,4 @@
 from .nancodes import Nans
 from .weekday import Weekday
 
-__version__ = "0.3.24"
+__version__ = "0.3.25"
@@ -443,7 +443,7 @@ def add_population_column(
         ---------
         data: pd.DataFrame
             The dataframe with a FIPS code column.
-        geocode_type: {"fips", "zip"}
+        geocode_type:
             The type of the geocode contained in geocode_col.
         geocode_col: str, default None
             The name of the column containing the geocodes. If None, uses the geocode_type
@@ -671,8 +671,10 @@ def aggregate_by_weighted_sum(
             to a from_geo, e.g. "wastewater collection site").
         to_geo: str
             The column name of the geocode to aggregate to.
-        sensor: str
+        sensor_col: str
             The column name of the sensor to aggregate.
+        time_col: str
+            The column name of the timestamp to aggregate over.
         population_column: str
             The column name of the population to weight the sensor by.
 
 
@@ -1,5 +1,7 @@
 """Structured logger utility for creating JSON logs.
 
+See the delphi_utils README.md for usage examples.
+
 The Delphi group uses two ~identical versions of this file.
 Try to keep them in sync with edits, for sanity.
   https://github.com/cmu-delphi/covidcast-indicators/blob/main/_delphi_utils_python/delphi_utils/logger.py
@@ -133,19 +135,17 @@ class LoggerThread():
     """
     A construct to use a logger from multiprocessing workers/jobs.
 
-    the bare structlog loggers are thread-safe but not multiprocessing-safe.
-    a `LoggerThread` will spawn a thread that listens to a mp.Queue
-    and logs messages from it with the provided logger,
-    so other processes can send logging messages to it
-    via the logger-like `SubLogger` interface.
-    the SubLogger even logs the pid of the caller.
+    The bare structlog loggers are thread-safe but not multiprocessing-safe. A
+    `LoggerThread` will spawn a thread that listens to a mp.Queue and logs
+    messages from it with the provided logger, so other processes can send
+    logging messages to it via the logger-like `SubLogger` interface. The
+    SubLogger even logs the pid of the caller.
 
-    this is good to use with a set of jobs that are part of a mp.Pool,
-    but isnt recommended for general use
-    because of overhead from threading and multiprocessing,
-    and because it might introduce lag to log messages.
+    This is good to use with a set of jobs that are part of a mp.Pool, but isnt
+    recommended for general use because of overhead from threading and
+    multiprocessing, and because it might introduce lag to log messages.
 
-    somewhat inspired by:
+    Somewhat inspired by:
     docs.python.org/3/howto/logging-cookbook.html#logging-to-a-single-file-from-multiple-processes
     """
 
@@ -236,13 +236,11 @@ def pool_and_threadedlogger(logger, *poolargs):
     """
     Provide (to a context) a multiprocessing Pool and a proxy to the supplied logger.
 
-    Emulates the multiprocessing.Pool() context manager,
-    but also provides (via a LoggerThread) a SubLogger proxy to logger
-    that can be safely used by pool workers.
-    The SubLogger proxy interface supports these methods: debug, info, warning, error,
-    and critical.
-    Also "cleans up" the pool by waiting for workers to complete
-    as it exits the context.
+    Emulates the multiprocessing.Pool() context manager, but also provides (via
+    a LoggerThread) a SubLogger proxy to logger that can be safely used by pool
+    workers. The SubLogger proxy interface supports these methods: debug, info,
+    warning, error, and critical. Also "cleans up" the pool by waiting for
+    workers to complete as it exits the context.
     """
     with multiprocessing.Manager() as manager:
         logger_thread = LoggerThread(logger, manager.Queue())
 
@@ -0,0 +1,57 @@
+[build-system]
+requires = ["setuptools", "setuptools-scm>=8.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "delphi-utils"
+version = "0.3.25"
+description = "Shared Utility Functions for Indicators"
+readme = "README.md"
+requires-python = "== 3.8.*"
+license = { text = "MIT License" }
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3.8",
+    "License :: MIT",
+]
+dependencies = [
+    "boto3",
+    "covidcast",
+    "cvxpy",
+    "epiweeks",
+    "gitpython",
+    "importlib_resources>=1.3",
+    "numpy",
+    "pandas>=1.1.0",
+    "requests",
+    "slackclient",
+    "scs<3.2.6",                # TODO: remove this ; it is a cvxpy dependency, and the excluded version appears to break our jenkins build. see: https://github.com/cvxgrp/scs/issues/283
+    "structlog",
+    "xlrd",                     # needed by Pandas to read Excel files
+]
+
+[project.urls]
+Homepage = "https://github.com/cmu-delphi/covidcast-indicators"
+
+[project.optional-dependencies]
+dev = [
+    "darker[isort]~=2.1.1",
+    "pylint==2.8.3",
+    "pytest",
+    "pydocstyle",
+    "pytest-cov",
+    "mock",
+    "moto~=4.2.14",
+    "requests-mock",
+    "freezegun",
+]
+flash = ["scipy"]
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["delphi_utils"]
+namespaces = true
+
+[tool.setuptools.package-data]
+"delphi_utils.data" = ["20*/*.csv"]