Skip to content

Commit 5f9fb17

Browse files
authored
Migrate to OpenSearch (#16180)
* use opensearch container locally, to match production index * migrate to opensearchpy * rename os -> opensearch, no need to save keystrokes * Rename wrapper/page * rename env var * update docs * remove refs to dead deps
1 parent bdd7f89 commit 5f9fb17

26 files changed

+220
-212
lines changed

.github/dependabot.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ updates:
1111
- dependency-type: indirect
1212
rebase-strategy: "disabled"
1313
ignore:
14-
# Always ignore elasticsearch, future versions are always incompatible with our provider
15-
- dependency-name: "elasticsearch"
1614
# These update basically every day, and 99.9% of the time we don't care
1715
- dependency-name: "boto3"
1816
- dependency-name: "boto3-stubs"

dev/compose/opensearch/Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM opensearchproject/opensearch:2.12.0
2+
3+
RUN opensearch-plugin remove opensearch-skills --purge
4+
RUN opensearch-plugin remove opensearch-ml --purge
5+
RUN opensearch-plugin remove opensearch-neural-search --purge
6+
RUN opensearch-plugin remove opensearch-performance-analyzer --purge
7+
RUN opensearch-plugin remove opensearch-security-analytics --purge

dev/environment

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ BROKER_URL=sqs://localstack:4566/?region=us-east-1&queue_name_prefix=warehouse-d
1111

1212
DATABASE_URL=postgresql+psycopg://postgres@db/warehouse
1313

14-
ELASTICSEARCH_URL=http://elasticsearch:9200/development
14+
OPENSEARCH_URL=http://opensearch:9200/development
1515

1616
REDIS_URL=redis://redis:6379/0
1717

docker-compose.override.yaml-sample

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ services:
3434
dev-docs: *disable-service
3535
user-docs: *disable-service
3636

37-
elasticsearch:
37+
opensearch:
3838
# You can also add selective environment variables
3939
environment:
4040
logger.level: WARN # default INFO is pretty noisy

docker-compose.yml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,20 @@ services:
4444
ports:
4545
- "4566:4566"
4646

47-
elasticsearch:
48-
image: elasticsearch:7.10.1
47+
opensearch:
48+
build:
49+
context: ./dev/compose/opensearch
50+
init: true
51+
healthcheck:
52+
test: ["CMD-SHELL", "curl -u admin:gqYeDIzbEwTTYmB7 --silent --fail http://localhost:9200/_cluster/health || exit 1"]
53+
interval: 1s
54+
start_period: 10s
4955
environment:
50-
- xpack.security.enabled=false
5156
- discovery.type=single-node
57+
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=gqYeDIzbEwTTYmB7
58+
- DISABLE_INSTALL_DEMO_CONFIG=true
59+
- DISABLE_SECURITY_PLUGIN=true
60+
- DISABLE_PERFORMANCE_ANALYZER_AGENT_CLI=true
5261
ulimits:
5362
nofile:
5463
soft: 65536
@@ -109,7 +118,7 @@ services:
109118
depends_on:
110119
db:
111120
condition: service_healthy
112-
elasticsearch:
121+
opensearch:
113122
condition: service_started
114123
redis:
115124
condition: service_started

docs/dev/architecture.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,12 @@ C4Container
9191
Container(camo, "Camo", "image proxy")
9292
Container(web_app, "Web", "Python (Pyramid, SQLAlchemy)", "Delivers HTML and API content")
9393
SystemQueue(sqs, "AWS SQS", "task broker")
94-
SystemDb(elasticsearch, "Elasticsearch", "Index of projects, packages, metadata")
94+
SystemDb(opensearch, "OpenSearch", "Index of projects, packages, metadata")
9595
SystemDb(db, "Postgres Database", "Store project, package metadata, user details")
9696
SystemDb(redis, "Redis", "Store short-term cache data")
9797
9898
Rel(web_app, sqs, "queue tasks")
99-
Rel(web_app, elasticsearch, "search for projects")
99+
Rel(web_app, opensearch, "search for projects")
100100
Rel(web_app, db, "store/retrieve most data")
101101
Rel(web_app, redis, "cache data")
102102
}
@@ -153,7 +153,7 @@ C4Container
153153
Container_Boundary(c1, "Supporting Systems") {
154154
SystemDb(redis, "Redis", "Store short-term cache data")
155155
SystemQueue(sqs, "AWS SQS", "task broker")
156-
SystemDb(elasticsearch, "Elasticsearch", "Index of projects, packages, metadata")
156+
SystemDb(opensearch, "OpenSearch", "Index of projects, packages, metadata")
157157
SystemDb(db, "Postgres Database", "Store project, package metadata, user details")
158158
System(ses, "AWS SES", "Simple Email Service")
159159
}
@@ -163,7 +163,7 @@ C4Container
163163
BiRel(worker, sqs, "get next task/ack")
164164
BiRel(worker, redis, "store task results")
165165
BiRel(worker, db, "interact with models")
166-
BiRel(worker, elasticsearch, "update search index")
166+
BiRel(worker, opensearch, "update search index")
167167
Rel(worker, fastly, "purge URLs")
168168
Rel(worker, ses, "send emails")
169169

docs/dev/development/getting-started.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,8 @@ application.
188188
(on Windows by editing the config file found at ``C:\Users\<USER>\AppData\Local\Docker\wsl``).
189189

190190
If you are using Linux, you may need to configure the maximum map count to get
191-
the `elasticsearch` up and running. According to the
192-
`documentation <https://www.elastic.co/guide/en/elasticsearch/reference/6.8/vm-max-map-count.html>`_
191+
the `opensearch` up and running. According to the
192+
`documentation <https://opensearch.org/docs/2.15/install-and-configure/install-opensearch/index/#important-settings>`_
193193
this can be set temporarily:
194194

195195
.. code-block:: console
@@ -200,9 +200,9 @@ application.
200200
:file:`/etc/sysctl.conf`.
201201

202202
Also check that you have more than 5% disk space free, otherwise
203-
elasticsearch will become read only. See ``flood_stage`` in the
204-
`elasticsearch disk allocation docs
205-
<https://www.elastic.co/guide/en/elasticsearch/reference/6.8/disk-allocator.html>`_.
203+
opensearch will become read only. See ``flood_stage`` in the
204+
`opensearch disk allocation docs
205+
<https://opensearch.org/docs/latest/install-and-configure/configuring-opensearch/cluster-settings/#cluster-level-routing-and-allocation-settings>`_.
206206

207207

208208
Once ``make build`` has finished, run the command:
@@ -414,10 +414,10 @@ Errors when executing ``make initdb``
414414

415415
* If ``make initdb`` fails with a timeout like::
416416

417-
urllib3.exceptions.ConnectTimeoutError: (<urllib3.connection.HTTPConnection object at 0x8beca733c3c8>, 'Connection to elasticsearch timed out. (connect timeout=30)')
417+
urllib3.exceptions.ConnectTimeoutError: (<urllib3.connection.HTTPConnection object at 0x8beca733c3c8>, 'Connection to opensearch timed out. (connect timeout=30)')
418418

419419
you might need to increase the amount of memory allocated to docker, since
420-
elasticsearch wants a lot of memory (Dustin gives warehouse ~4GB locally).
420+
opensearch wants a lot of memory (Dustin gives warehouse ~4GB locally).
421421
Refer to the tip under :ref:`running-warehouse-containers` section for more details.
422422

423423

@@ -478,7 +478,7 @@ Docker please raise an issue in
478478
Disabling services locally
479479
^^^^^^^^^^^^^^^^^^^^^^^^^^
480480

481-
Some services, such as Elasticsearch, consume a lot of resources when running
481+
Some services, such as OpenSearch, consume a lot of resources when running
482482
locally, but might not always be necessary when doing local development.
483483

484484
To disable these locally, you can create a ``docker-compose.override.yaml``
@@ -490,8 +490,8 @@ individually disable services, modify their entrypoint to do something else:
490490
version: "3"
491491
492492
services:
493-
elasticsearch:
494-
entrypoint: ["echo", "Elasticsearch disabled"]
493+
opensearch:
494+
entrypoint: ["echo", "OpenSearch disabled"]
495495
496496
Note that disabling services might cause things to fail in unexpected ways.
497497

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ module = [
5656
"b2sdk.*", # https://github.com/Backblaze/b2-sdk-python/issues/148
5757
"celery.app.backends.*",
5858
"celery.backends.redis.*",
59-
"elasticsearch_dsl.*", # https://github.com/elastic/elasticsearch-dsl-py/issues/1533
6059
"github_reserved_names.*",
6160
"google.cloud.*",
6261
"forcediphttpsadapter.*",

requirements/main.in

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ click
1313
cryptography
1414
datadog>=0.19.0
1515
disposable-email-domains
16-
elasticsearch>=7.0.0,<7.11.0
17-
elasticsearch_dsl>=7.0.0,<8.0.0
1816
first
1917
forcediphttpsadapter
2018
github-reserved-names>=1.0.0
@@ -31,6 +29,7 @@ linehaul
3129
lxml
3230
msgpack
3331
natsort
32+
opensearch-py
3433
orjson
3534
packaging>=23.2
3635
packaging_legacy
@@ -69,7 +68,7 @@ structlog
6968
transaction
7069
trove-classifiers
7170
ua-parser
72-
urllib3<2 # See https://github.com/pypi/warehouse/issues/14671
71+
urllib3
7372
webauthn>=1.0.0,<3.0.0
7473
whitenoise
7574
WTForms[email]>=2.0.0

requirements/main.txt

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ certifi==2024.6.2 \
184184
--hash=sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56
185185
# via
186186
# -r requirements/main.in
187-
# elasticsearch
187+
# opensearch-py
188188
# requests
189189
# sentry-sdk
190190
cffi==1.16.0 \
@@ -485,20 +485,13 @@ docutils==0.20.1 \
485485
--hash=sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 \
486486
--hash=sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b
487487
# via readme-renderer
488-
elasticsearch==7.10.1 \
489-
--hash=sha256:4ebd34fd223b31c99d9f3b6b6236d3ac18b3046191a37231e8235b06ae7db955 \
490-
--hash=sha256:a725dd923d349ca0652cf95d6ce23d952e2153740cf4ab6daf4a2d804feeed48
491-
# via
492-
# -r requirements/main.in
493-
# elasticsearch-dsl
494-
elasticsearch-dsl==7.4.1 \
495-
--hash=sha256:07ee9c87dc28cc3cae2daa19401e1e18a172174ad9e5ca67938f752e3902a1d5 \
496-
--hash=sha256:97f79239a252be7c4cce554c29e64695d7ef6a4828372316a5e5ff815e7a7498
497-
# via -r requirements/main.in
498488
email-validator==2.2.0 \
499489
--hash=sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631 \
500490
--hash=sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7
501491
# via wtforms
492+
events==0.5 \
493+
--hash=sha256:a7286af378ba3e46640ac9825156c93bdba7502174dd696090fdfcd4d80a1abd
494+
# via opensearch-py
502495
first==2.0.2 \
503496
--hash=sha256:8d8e46e115ea8ac652c76123c0865e3ff18372aef6f03c22809ceefcea9dec86 \
504497
--hash=sha256:ff285b08c55f8c97ce4ea7012743af2495c9f1291785f163722bd36f6af6d3bf
@@ -1285,6 +1278,10 @@ openapi-spec-validator==0.7.1 \
12851278
--hash=sha256:3c81825043f24ccbcd2f4b149b11e8231abce5ba84f37065e14ec947d8f4e959 \
12861279
--hash=sha256:8577b85a8268685da6f8aa30990b83b7960d4d1117e901d451b5d572605e5ec7
12871280
# via openapi-core
1281+
opensearch-py==2.6.0 \
1282+
--hash=sha256:0b7c27e8ed84c03c99558406927b6161f186a72502ca6d0325413d8e5523ba96 \
1283+
--hash=sha256:b6e78b685dd4e9c016d7a4299cf1de69e299c88322e3f81c716e6e23fe5683c1
1284+
# via -r requirements/main.in
12881285
orjson==3.10.5 \
12891286
--hash=sha256:03b565c3b93f5d6e001db48b747d31ea3819b89abf041ee10ac6988886d18e01 \
12901287
--hash=sha256:099e81a5975237fda3100f918839af95f42f981447ba8f47adb7b6a3cdb078fa \
@@ -1649,8 +1646,8 @@ python-dateutil==2.9.0.post0 \
16491646
# botocore
16501647
# celery
16511648
# celery-redbeat
1652-
# elasticsearch-dsl
16531649
# google-cloud-bigquery
1650+
# opensearch-py
16541651
python-slugify==8.0.4 \
16551652
--hash=sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8 \
16561653
--hash=sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856
@@ -1745,6 +1742,7 @@ requests==2.32.3 \
17451742
# google-cloud-bigquery
17461743
# google-cloud-storage
17471744
# jsonschema-path
1745+
# opensearch-py
17481746
# premailer
17491747
# requests-aws4auth
17501748
# stripe
@@ -1880,9 +1878,9 @@ six==1.16.0 \
18801878
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
18811879
# via
18821880
# automat
1883-
# elasticsearch-dsl
18841881
# html5lib
18851882
# isodate
1883+
# opensearch-py
18861884
# pymacaroons
18871885
# python-dateutil
18881886
# requests-aws4auth
@@ -2006,8 +2004,8 @@ urllib3==1.26.19 \
20062004
# -r requirements/main.in
20072005
# botocore
20082006
# celery
2009-
# elasticsearch
20102007
# kombu
2008+
# opensearch-py
20112009
# requests
20122010
# sentry-sdk
20132011
venusian==3.1.0 \

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def app_config(database):
312312
"database.url": database,
313313
"docs.url": "http://docs.example.com/",
314314
"ratelimit.url": "memory://",
315-
"elasticsearch.url": "https://localhost/warehouse",
315+
"opensearch.url": "https://localhost/warehouse",
316316
"files.backend": "warehouse.packaging.services.LocalFileStorage",
317317
"archive_files.backend": "warehouse.packaging.services.LocalArchiveFileStorage",
318318
"simple.backend": "warehouse.packaging.services.LocalSimpleStorage",

tests/unit/search/test_init.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
import elasticsearch
13+
import opensearchpy
1414
import pretend
1515

1616
from warehouse import search
@@ -69,7 +69,7 @@ def test_execute_unindex_success(app_config):
6969
assert "warehouse.search.project_deletes" not in session.info
7070

7171

72-
def test_es(monkeypatch):
72+
def test_opensearch(monkeypatch):
7373
search_obj = pretend.stub()
7474
index_obj = pretend.stub(
7575
document=pretend.call_recorder(lambda d: None),
@@ -84,15 +84,15 @@ def test_es(monkeypatch):
8484
client = pretend.stub()
8585
request = pretend.stub(
8686
registry={
87-
"elasticsearch.client": client,
88-
"elasticsearch.index": "warehouse",
87+
"opensearch.client": client,
88+
"opensearch.index": "warehouse",
8989
"search.doc_types": doc_types,
9090
}
9191
)
9292

93-
es = search.es(request)
93+
opensearch = search.opensearch(request)
9494

95-
assert es is search_obj
95+
assert opensearch is search_obj
9696
assert index_cls.calls == [pretend.call("warehouse", using=client)]
9797
assert index_obj.document.calls == [pretend.call(d) for d in doc_types]
9898
assert index_obj.settings.calls == [
@@ -104,20 +104,20 @@ def test_es(monkeypatch):
104104
def test_includeme(monkeypatch):
105105
aws4auth_stub = pretend.stub()
106106
aws4auth = pretend.call_recorder(lambda *a, **kw: aws4auth_stub)
107-
es_client = pretend.stub()
108-
es_client_init = pretend.call_recorder(lambda *a, **kw: es_client)
107+
opensearch_client = pretend.stub()
108+
opensearch_client_init = pretend.call_recorder(lambda *a, **kw: opensearch_client)
109109

110110
monkeypatch.setattr(search.requests_aws4auth, "AWS4Auth", aws4auth)
111-
monkeypatch.setattr(search.elasticsearch, "Elasticsearch", es_client_init)
111+
monkeypatch.setattr(search.opensearchpy, "OpenSearch", opensearch_client_init)
112112

113113
registry = {}
114-
es_url = "https://some.url/some-index?aws_auth=1&region=us-east-2"
114+
opensearch_url = "https://some.url/some-index?aws_auth=1&region=us-east-2"
115115
config = pretend.stub(
116116
registry=pretend.stub(
117117
settings={
118118
"aws.key_id": "AAAAAAAAAAAA",
119119
"aws.secret_key": "deadbeefdeadbeefdeadbeef",
120-
"elasticsearch.url": es_url,
120+
"opensearch.url": opensearch_url,
121121
},
122122
__setitem__=registry.__setitem__,
123123
),
@@ -130,20 +130,20 @@ def test_includeme(monkeypatch):
130130
assert aws4auth.calls == [
131131
pretend.call("AAAAAAAAAAAA", "deadbeefdeadbeefdeadbeef", "us-east-2", "es")
132132
]
133-
assert len(es_client_init.calls) == 1
134-
assert es_client_init.calls[0].kwargs["hosts"] == ["https://some.url"]
135-
assert es_client_init.calls[0].kwargs["timeout"] == 2
136-
assert es_client_init.calls[0].kwargs["retry_on_timeout"] is False
133+
assert len(opensearch_client_init.calls) == 1
134+
assert opensearch_client_init.calls[0].kwargs["hosts"] == ["https://some.url"]
135+
assert opensearch_client_init.calls[0].kwargs["timeout"] == 2
136+
assert opensearch_client_init.calls[0].kwargs["retry_on_timeout"] is False
137137
assert (
138-
es_client_init.calls[0].kwargs["connection_class"]
139-
== elasticsearch.connection.http_requests.RequestsHttpConnection
138+
opensearch_client_init.calls[0].kwargs["connection_class"]
139+
== opensearchpy.connection.http_requests.RequestsHttpConnection
140140
)
141-
assert es_client_init.calls[0].kwargs["http_auth"] == aws4auth_stub
141+
assert opensearch_client_init.calls[0].kwargs["http_auth"] == aws4auth_stub
142142

143-
assert registry["elasticsearch.client"] == es_client
144-
assert registry["elasticsearch.index"] == "some-index"
145-
assert registry["elasticsearch.shards"] == 1
146-
assert registry["elasticsearch.replicas"] == 0
143+
assert registry["opensearch.client"] == opensearch_client
144+
assert registry["opensearch.index"] == "some-index"
145+
assert registry["opensearch.shards"] == 1
146+
assert registry["opensearch.replicas"] == 0
147147
assert config.add_request_method.calls == [
148-
pretend.call(search.es, name="es", reify=True)
148+
pretend.call(search.opensearch, name="opensearch", reify=True)
149149
]

0 commit comments

Comments
 (0)