diff --git a/CHANGES.rst b/CHANGES.rst index 091ada93..4579144a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,10 @@ Enhancements and Fixes - Change AsyncTAPJob.result to return None if no result is found explicitly [#644] +- Add a UAT constraint to the registry interface for constraining + subjects [#649] + + Deprecations and Removals ------------------------- diff --git a/docs/dal/index.rst b/docs/dal/index.rst index 832b39e3..12280174 100644 --- a/docs/dal/index.rst +++ b/docs/dal/index.rst @@ -15,7 +15,7 @@ metadata. .. doctest-remote-data:: >>> import pyvo as vo - >>> service = vo.dal.SIAService("http://dc.zah.uni-heidelberg.de/lswscans/res/positions/siap/siap.xml") + >>> service = vo.dal.SIAService("http://dc.g-vo.org/lswscans/res/positions/siap/siap.xml") >>> print(service.description) Scans of plates kept at Landessternwarte Heidelberg-Königstuhl. They were obtained at location, at the German-Spanish Astronomical Center @@ -473,7 +473,7 @@ Basic queries are done with the ``pos`` and ``size`` parameters described in >>> pos = SkyCoord.from_name('Eta Carina') >>> size = Quantity(0.5, unit="deg") - >>> sia_service = vo.dal.SIAService("http://dc.zah.uni-heidelberg.de/hppunion/q/im/siap.xml") + >>> sia_service = vo.dal.SIAService("http://dc.g-vo.org/hppunion/q/im/siap.xml") >>> sia_results = sia_service.search(pos=pos, size=size) The dataset format, 'all' by default, can be specified: @@ -565,7 +565,7 @@ within a circular region on the sky defined by the parameters ``pos`` .. doctest-remote-data:: - >>> scs_srv = vo.dal.SCSService('http://dc.zah.uni-heidelberg.de/arihip/q/cone/scs.xml') + >>> scs_srv = vo.dal.SCSService('http://dc.g-vo.org/arihip/q/cone/scs.xml') >>> scs_results = scs_srv.search(pos=pos, radius=size) This service exposes the :ref:`verbosity ` parameter. @@ -754,7 +754,7 @@ If the row contains datasets, they are exposed by several retrieval methods: .. doctest-skip:: >>> row.getdataurl() - 'http://dc.zah.uni-heidelberg.de/getproduct/califa/datadr3/V500/NGC0551.V500.rscube.fits' + 'http://dc.g-vo.org/getproduct/califa/datadr3/V500/NGC0551.V500.rscube.fits' >>> type(row.getdataset()) diff --git a/docs/discover/index.rst b/docs/discover/index.rst index 327e9280..63a41018 100644 --- a/docs/discover/index.rst +++ b/docs/discover/index.rst @@ -168,9 +168,11 @@ are two ways to do that: to call something like:: discoverer.set_services( - registry.search(registry.Author("Hubble, %"))) + registry.search(registry.UAT("galaxies", expand_down=3))) - to query services that give a particular author. More realistically, + to query services that claim to deal with galaxies or perhaps more + specific concepts (although this *will* pull a lot of extra services + that the discoverer will just discard). More realistically, :: diff --git a/docs/registry/index.rst b/docs/registry/index.rst index 65c16fe7..6a6300c9 100644 --- a/docs/registry/index.rst +++ b/docs/registry/index.rst @@ -54,6 +54,8 @@ keyword arguments. The following constraints are available: * :py:class:`~pyvo.registry.UCD` (``ucd``): constrain by one or more UCD patterns; resources match when they serve columns having a matching UCD (e.g., ``phot.mag;em.ir.%`` for “any infrared magnitude”). +* :py:class:`~pyvo.registry.UAT` (``uat``): constrain by concepts + from the IVOA Unified Astronomy Thesaurus http://www.ivoa.net/rdf/uat. * :py:class:`~pyvo.registry.Waveband` (``waveband``): one or more terms from the vocabulary at http://www.ivoa.net/rdf/messenger giving the rough spectral location of the resource. @@ -97,9 +99,22 @@ or: ... registry.Waveband("UV")) or a mixture between the two. Constructing using explicit -constraints is generally preferable with more complex queries. Where -the constraints accept multiple arguments, you can pass in sequences to -the keyword arguments; for instance: +constraints is generally preferable with more complex queries. +An advantage of using explicit constraints is that you can pass +additional parameters to the constraints. For instance, the UAT +constraint can optionally expand your keyword to narrower or wider +concepts. When looking for resources talking about Cepheids of all +kinds, you can thus say: + +.. doctest-remote-data:: + + >>> resources = registry.search( + ... registry.UAT("cepheid-variable-stars", expand_down=3)) + +There is no way to express this using keyword arguments. + +However, where the constraints accept multiple equivalent arguments, you +can pass in sequences to the keyword arguments; for instance: .. doctest-remote-data:: @@ -113,6 +128,7 @@ is equivalent to: >>> resources = registry.search(waveband=["Radio", "Millimeter"], ... author='%Miller%') + There is also :py:meth:`~pyvo.registry.get_RegTAP_query`, accepting the same arguments as :py:meth:`pyvo.registry.search`. This function simply returns the ADQL query that search would execute. This is may be useful diff --git a/pyvo/registry/__init__.py b/pyvo/registry/__init__.py index 291c6eb6..fa8b4075 100644 --- a/pyvo/registry/__init__.py +++ b/pyvo/registry/__init__.py @@ -12,11 +12,12 @@ from .rtcons import (Constraint, SubqueriedConstraint, Freetext, Author, Servicetype, Waveband, Datamodel, Ivoid, - UCD, Spatial, Spectral, Temporal, RegTAPFeatureMissing) + UCD, UAT, Spatial, Spectral, Temporal, + RegTAPFeatureMissing) __all__ = ["search", "get_RegTAP_query", "Constraint", "SubqueriedConstraint", "Freetext", "Author", "Servicetype", "Waveband", "Datamodel", "Ivoid", "UCD", - "Spatial", "Spectral", "Temporal", + "UAT", "Spatial", "Spectral", "Temporal", "choose_RegTAP_service", "RegTAPFeatureMissing", "RegistryResults", "RegistryResource",] diff --git a/pyvo/registry/rtcons.py b/pyvo/registry/rtcons.py index c55534c3..704fc44f 100644 --- a/pyvo/registry/rtcons.py +++ b/pyvo/registry/rtcons.py @@ -108,6 +108,9 @@ def make_sql_literal(value): elif isinstance(value, datetime.datetime): return f"'{value.isoformat()}'" + elif isinstance(value, set): + return '('+", ".join(make_sql_literal(s) for s in sorted(value))+')' + else: raise ValueError("Cannot format {} as a SQL literal" .format(repr(value))) @@ -637,6 +640,93 @@ def __init__(self, *patterns): for index, pattern in enumerate(patterns)} +class UAT(SubqueriedConstraint): + """ + A constraint selecting resources having UAT keywords as subjects. + + The UAT (Unified Astronomy Thesaurus) is a hierarchical system + of concepts in astronomy. In the VO, its concept identifiers + are dashed strings, something like ``x-ray-transient-sources``. + The full list of identifiers is available from + http://www.ivoa.net/rdf/uat. + + Note that not all data providers properly use UAT keywords in their + subjects even in 2025 (they should, though), and their keyword + assignments may not always be optimal. Consider doing free + text searches if UAT-based results are disappointing, and then + telling the respective data providers about missing keywords. + """ + _keyword = "uat" + _subquery_table = "rr.res_subject" + _condition = "res_subject in {query_terms}" + _uat = None + + @classmethod + def _expand(cls, term, level, direction): + """ + Recursively expand term in the uat. + + This returns a set of concepts that are ``level`` levels wider + or narrower (depending on the value of ``direction``) than term. + + This function assumes the _uat class attribute has been filled + before; that is the case once a constraint has been constructed. + + Parameters + ---------- + + term: str + the start term + level: int + expand this many levels + direction: str + either ``wider`` to expand towards more general concepts + or ``narrower`` to expand toward more specialised concepts. + """ + result = {term} + new_concepts = cls._uat[term][direction] + if level: + for concept in new_concepts: + result |= cls._expand(concept, level-1, direction) + return result + + def __init__(self, uat_keyword, *, expand_up=0, expand_down=0): + """ + + Parameters + ---------- + + uat_keyword: str + An identifier from http://www.ivoa.net/rdf/uat, i.e., a + string like type-ib-supernovae. Note that these are + always all-lowercase. + expand_up: int + In addition to the concept itself, also include expand_up + levels of parent concepts (this is probably rarely makes + sense beyond 1). + expand_down: int + In addition to the concept itself, also include expand_down + levels of more specialised concepts (this is usually a good + idea; having more than 10 here for now is equivalent to + infinity). + """ + if self.__class__._uat is None: + self.__class__._uat = vocabularies.get_vocabulary("uat")["terms"] + + if uat_keyword not in self._uat: + raise dalq.DALQueryError( + f"{uat_keyword} does not identify an IVOA uat" + " concept (see http://www.ivoa.net/rdf/uat).") + + query_terms = {uat_keyword} + if expand_up: + query_terms |= self._expand(uat_keyword, expand_up, "wider") + if expand_down: + query_terms |= self._expand(uat_keyword, expand_down, "narrower") + + self._fillers = {"query_terms": query_terms} + + class Spatial(SubqueriedConstraint): """ A RegTAP constraint selecting resources covering a geometry in diff --git a/pyvo/registry/tests/test_rtcons.py b/pyvo/registry/tests/test_rtcons.py index 79e78a21..2bd01a66 100644 --- a/pyvo/registry/tests/test_rtcons.py +++ b/pyvo/registry/tests/test_rtcons.py @@ -284,6 +284,36 @@ def test_basic(self): "ucd LIKE 'phot.mag;em.opt.%' OR ucd LIKE 'phot.mag;em.ir.%'")) +@pytest.mark.remote_data +class TestUATConstraint: + def test_basic(self): + cons = rtcons.UAT("solar-flares") + assert (cons.get_search_condition(FAKE_GAVO) + == "ivoid IN (SELECT DISTINCT ivoid FROM rr.res_subject WHERE res_subject in ('solar-flares'))") + + def test_nonterm(self): + with pytest.raises(dalq.DALQueryError, match="solarium does not identify"): + rtcons.UAT("solarium") + + def test_wider(self): + cons = rtcons.UAT("solar-flares", expand_up=2) + assert (cons.get_search_condition(FAKE_GAVO) + == "ivoid IN (SELECT DISTINCT ivoid FROM rr.res_subject WHERE res_subject in" + " ('solar-activity', 'solar-flares', 'solar-physics', 'solar-storm'))") + + def test_narrower(self): + cons = rtcons.UAT("solar-activity", expand_down=1) + assert (cons.get_search_condition(FAKE_GAVO) + == "ivoid IN (SELECT DISTINCT ivoid FROM rr.res_subject WHERE res_subject in" + " ('solar-active-regions', 'solar-activity', 'solar-filaments', 'solar-flares'," + " 'solar-magnetic-bright-points', 'solar-prominences', 'solar-storm'))") + cons = rtcons.UAT("solar-activity", expand_down=2) + assert (cons.get_search_condition(FAKE_GAVO).startswith( + "ivoid IN (SELECT DISTINCT ivoid FROM rr.res_subject WHERE res_subject in" + " ('ephemeral-active-regions', 'quiescent-solar-prominence'," + " 'solar-active-region-filaments'")) + + class TestSpatialConstraint: def test_point(self): cons = registry.Spatial([23, -40]) @@ -522,7 +552,7 @@ def test_bad_keyword(self): assert str(excinfo.value) == ("foo is not a valid registry" " constraint keyword. Use one of" " author, datamodel, ivoid, keywords, servicetype," - " spatial, spectral, temporal, ucd, waveband.") + " spatial, spectral, temporal, uat, ucd, waveband.") def test_with_legacy_keyword(self): assert self.where_clause_for( @@ -614,9 +644,10 @@ def test_all_constraints(): moc = rtcons.Spatial("0/0-11", intersect="overlaps") spectral = rtcons.Spectral((5000 * u.Angstrom, 6000 * u.Angstrom)) time = rtcons.Temporal((50000, 60000)) + uat = rtcons.UAT('galaxies', expand_down=3) result = registry.search( text, author, servicetype, waveband, datamodel, - ivoid, ucd, moc, spectral, time + ivoid, ucd, moc, spectral, time, uat ) assert result.fieldnames == ( 'ivoid', 'res_type', 'short_name',