Skip to content

Commit ce95ecc

Browse files
committed
updated to sdk 1.0.9, spacy 3.6
1 parent 555b3cc commit ce95ecc

File tree

5 files changed

+38
-65
lines changed

5 files changed

+38
-65
lines changed

Containerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Use the same base image version as the clams-python python library version
2-
FROM ghcr.io/clamsproject/clams-python:1.0.7
2+
FROM ghcr.io/clamsproject/clams-python:1.0.9
33
# See https://github.com/orgs/clamsproject/packages?tab=packages&q=clams-python for more base images
44
# IF you want to automatically publish this image to the clamsproject organization,
55
# 1. you should have generated this template without --no-github-actions flag
@@ -24,6 +24,7 @@ ENV CLAMS_APP_VERSION ${CLAMS_APP_VERSION}
2424
COPY ./ /app
2525
WORKDIR /app
2626
RUN pip3 install -r requirements.txt
27+
RUN python3 -m spacy download en_core_web_sm
2728

2829
RUN python3 -m spacy download en_core_web_sm
2930

README.md

+10-38
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,20 @@
11
# Spacy NLP Service
22

3-
The spaCy NLP tool wrapped as a CLAMS service, spaCy is distributed under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
3+
## Description
4+
The spaCy NLP wrapped as a CLAMS service, spaCy is distributed under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
45

5-
This requires Python 3.8 or higher. For local install of required Python modules see [requirements.txt](requirements.txt).
6-
7-
## Using this service
8-
9-
Use `python app.py -t example-mmif.json out.json` just to test the wrapping code without using a server. To test this using a server you run the app as a service in one terminal:
6+
## User instruction
107

11-
```bash
12-
$ python app.py
13-
```
8+
General user instructions for CLAMS apps is available at [CLAMS Apps documentation](https://apps.clams.ai/clamsapp).
149

15-
And poke at it from another:
10+
### System requirements
1611

17-
```bash
18-
$ curl http://0.0.0.0:5000/
19-
$ curl -H "Accept: application/json" -X POST [email protected] http://0.0.0.0:5000/
20-
```
21-
22-
In CLAMS you usually run this in a container. To create an image
23-
24-
```bash
25-
$ docker build -f Containerfile -t clams-spacy-wrapper .
26-
```
12+
This requires Python 3.8 or higher. For local install of required Python modules see [requirements.txt](requirements.txt).
2713

28-
And to run it as a container:
14+
#### Using as local python programe
2915

30-
```bash
31-
$ docker run --rm -d -p 5000:5000 clams-spacy-wrapper
32-
$ curl -H "Accept: application/json" -X POST [email protected] http://0.0.0.0:5000/
33-
```
16+
Use `python app.py -t example-mmif.json out.json` just to test the wrapping code without using a server.
3417

35-
The spaCy code will run on each text document in the input MMIF file. The file `example-mmif.json` has one text document in the top level `documents` property and two text documents in one of the views. The text documents all look as follows:
18+
### Configurable runtime parameter
3619

37-
```json
38-
{
39-
"@type": "http://mmif.clams.ai/0.4.0/vocabulary/TextDocument",
40-
"properties": {
41-
"id": "m2",
42-
"text": {
43-
"@value": "Hello, this is Jim Lehrer with the NewsHour on PBS...."
44-
}
45-
}
46-
}
47-
```
48-
Instead of a `text:@value` property the text could in an external file, which would be given as a URI in the `location` property. See the readme file in [https://github.com/clamsproject/app-nlp-example](https://github.com/clamsproject/app-nlp-example) on how to do this.
20+
For the full list of parameters, please refer to the app metadata from [CLAMS App Directory](https://apps.clams.ai/clamsapp/) or [`metadata.py`](metadata.py) file in this repository.

app.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,29 @@
1212
"""
1313

1414
import argparse
15+
import logging
1516
from typing import Union
1617

17-
# Imports needed for Clams and MMIF.
18-
# Non-NLP Clams applications will require AnnotationTypes
19-
18+
import spacy
2019
from clams import ClamsApp, Restifier
20+
from lapps.discriminators import Uri
2121
from mmif import Mmif, View, Annotation, Document, AnnotationTypes, DocumentTypes
22+
from spacy.tokens import Doc
2223

23-
# For an NLP tool we need to import the LAPPS vocabulary items
24-
from lapps.discriminators import Uri
2524

26-
# Spacy imports
27-
import spacy
28-
from spacy.tokens import Doc
25+
# Imports needed for Clams and MMIF.
26+
# Non-NLP Clams applications will require AnnotationTypes
2927

3028
class SpacyWrapper(ClamsApp):
3129

3230
def __init__(self):
3331
super().__init__()
34-
# load small English core model
35-
self.nlp = spacy.load("en_core_web_sm")
32+
# Load small English core model
33+
try:
34+
self.nlp = spacy.load("en_core_web_sm")
35+
except OSError as e: # spacy raises OSError if model not found
36+
spacy.cli.download("en_core_web_sm")
37+
self.nlp = spacy.load("en_core_web_sm")
3638

3739
def _appmetadata(self):
3840
# see metadata.py
@@ -106,13 +108,11 @@ def _test(infile, outfile):
106108

107109
if __name__ == "__main__":
108110
parser = argparse.ArgumentParser()
109-
parser.add_argument(
110-
"--port", action="store", default="5000", help="set port to listen"
111-
)
111+
parser.add_argument("--port", action="store", default="5000", help="set port to listen")
112112
parser.add_argument("--production", action="store_true", help="run gunicorn server")
113113
parser.add_argument('-t', '--test', action='store_true', help="bypass the server")
114-
parser.add_argument('infile', nargs='?', help="input MMIF file")
115-
parser.add_argument('outfile', nargs='?', help="output file")
114+
parser.add_argument('infile', nargs='?', help="input MMIF file, only with --test")
115+
parser.add_argument('outfile', nargs='?', help="output file, only with --test")
116116

117117
parsed_args = parser.parse_args()
118118

@@ -122,11 +122,11 @@ def _test(infile, outfile):
122122
# create the app instance
123123
app = SpacyWrapper()
124124

125-
http_app = Restifier(app, port=int(parsed_args.port)
126-
)
125+
http_app = Restifier(app, port=int(parsed_args.port))
127126
# for running the application in production mode
128127
if parsed_args.production:
129128
http_app.serve_production()
130129
# development mode
131130
else:
131+
app.logger.setLevel(logging.DEBUG)
132132
http_app.run()

metadata.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
DO NOT CHANGE the name of the file
55
"""
66

7-
from mmif import DocumentTypes, AnnotationTypes
8-
from lapps.discriminators import Uri
7+
import re
8+
99
from clams.app import ClamsApp
1010
from clams.appmetadata import AppMetadata
11-
import re
11+
from lapps.discriminators import Uri
12+
from mmif import DocumentTypes
13+
1214

1315
# DO NOT CHANGE the function name
1416
def appmetadata() -> AppMetadata:
@@ -26,7 +28,7 @@ def appmetadata() -> AppMetadata:
2628
name="CLAMS wrapper for spaCy NLP",
2729
description="Apply spaCy NLP to all text documents in a MMIF file.",
2830
app_license="Apache 2.0",
29-
identifier=f"http://apps.clams.ai/spacy-wrapper",
31+
identifier=f"spacy-wrapper",
3032
url='https://github.com/clamsproject/app-spacy-wrapper',
3133
analyzer_version=[l.strip().rsplit('==')[-1] for l in open('requirements.txt').readlines() if re.match(r'^spacy==', l)][0],
3234
analyzer_license='MIT'
@@ -58,4 +60,4 @@ def appmetadata() -> AppMetadata:
5860
metadata = appmetadata()
5961
for param in ClamsApp.universal_parameters:
6062
metadata.add_parameter(**param)
61-
sys.stdout.write(appmetadata().jsonify(pretty=True))
63+
sys.stdout.write(metadata.jsonify(pretty=True))

requirements.txt

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
1-
# Make sure clams-python version is explicitly specified, at least the lower bound
2-
clams-python==1.0.7
3-
spacy==3.1.2
4-
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.1.0/en_core_web_sm-3.1.0.tar.gz#egg=en_core_web_sm
1+
clams-python==1.0.9
2+
spacy==3.6

0 commit comments

Comments
 (0)