Skip to content
This repository was archived by the owner on Dec 10, 2019. It is now read-only.

Commit 024b5f5

Browse files
committed
Add method to start prep_train_data job
* Adds a method to start a prep_train_data raster-vision job, which makes it easy to generate a training data zip file given a list of projects and annotation URIs. * Moves some raster-vision utility functions and constants into utils.py and settings.py files.
1 parent b1045ec commit 024b5f5

File tree

5 files changed

+142
-40
lines changed

5 files changed

+142
-40
lines changed

rasterfoundry/api.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import uuid
23

34
from bravado.requests_client import RequestsClient
45
from bravado.client import SwaggerClient
@@ -7,7 +8,9 @@
78

89
from .models import Project, MapToken
910
from .exceptions import RefreshTokenException
10-
11+
from .utils import start_raster_vision_job, upload_raster_vision_config
12+
from .settings import (
13+
RV_CPU_JOB_DEF, RV_CPU_QUEUE, DEVELOP_BRANCH, RV_CONFIG_URI_ROOT)
1114

1215
SPEC_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)),
1316
'spec.yml')
@@ -125,3 +128,67 @@ def get_scenes(self, **kwargs):
125128
elif bbox and type(bbox) != type(','.join(str(x) for x in bbox)): # NOQA
126129
kwargs['bbox'] = ','.join(str(x) for x in bbox)
127130
return self.client.Imagery.get_scenes(**kwargs).result()
131+
132+
def get_project_configs(self, project_ids, annotation_uris):
133+
"""Get data needed to create project config file for prep_train_data
134+
135+
The prep_train_data script requires a project config files which
136+
lists the images and annotation URIs associated with each project
137+
that will be used to generate training data.
138+
139+
Args:
140+
project_ids: list of project ids to make training data from
141+
annotation_uris: list of corresponding annotation URIs
142+
143+
Returns:
144+
Object of form [{'images': [...], 'annotations':...}, ...]
145+
"""
146+
project_configs = []
147+
for project_id, annotation_uri in zip(project_ids, annotation_uris):
148+
proj = Project(
149+
self.client.Imagery.get_projects_uuid(uuid=project_id).result(),
150+
self)
151+
image_uris = proj.get_image_source_uris()
152+
project_configs.append({
153+
'images': image_uris,
154+
'annotations': annotation_uri
155+
})
156+
157+
return project_configs
158+
159+
def start_prep_train_data_job(self, project_ids, annotation_uris,
160+
output_zip_uri,
161+
config_uri_root=RV_CONFIG_URI_ROOT,
162+
job_queue=RV_CPU_QUEUE,
163+
job_definition=RV_CPU_JOB_DEF,
164+
branch_name=DEVELOP_BRANCH, attempts=1):
165+
"""Start a Batch job to prepare object detection training data.
166+
167+
Args:
168+
project_ids (list of str): ids of projects to make train data for
169+
annotation_uris (list of str): annotation URIs for projects
170+
output_zip_uri (str): URI of output zip file
171+
config_uri_root (str): The root of generated URIs for config files
172+
job_queue (str): name of the Batch job queue to run the job in
173+
job_definition (str): name of the Batch job definition
174+
branch_name (str): branch of the raster-vision repo to use
175+
attempts (int): number of attempts for the Batch job
176+
177+
Returns:
178+
job_id (str): job_id of job started on Batch
179+
"""
180+
project_configs = self.get_project_configs(
181+
project_ids, annotation_uris)
182+
config_uri = upload_raster_vision_config(
183+
project_configs, config_uri_root)
184+
185+
command = ('python -m rv.run prep_train_data --debug ' +
186+
'--chip-size 300 --num-neg-chips 100 ' +
187+
'--max-attempts 500 {} {}').format(
188+
config_uri, output_zip_uri)
189+
190+
job_name = 'prep_train_data_{}'.format(uuid.uuid1())
191+
job_id = start_raster_vision_job(
192+
job_name, command, job_queue, job_definition, branch_name,
193+
attempts=attempts)
194+
return job_id

rasterfoundry/models/project.py

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,56 +2,21 @@
22
import requests
33
import uuid
44

5-
import boto3
6-
75
from .. import NOTEBOOK_SUPPORT
86
from ..decorators import check_notebook
97
from ..exceptions import GatewayTimeoutException
8+
from ..utils import start_raster_vision_job
9+
from ..settings import RV_CPU_JOB_DEF, RV_CPU_QUEUE, DEVELOP_BRANCH
1010
from .map_token import MapToken
1111

12+
1213
if NOTEBOOK_SUPPORT:
1314
from ipyleaflet import (
1415
Map,
1516
SideBySideControl,
1617
TileLayer,
1718
)
1819

19-
RV_CPU_QUEUE = 'raster-vision-cpu'
20-
RV_CPU_JOB_DEF = 'raster-vision-cpu'
21-
DEVELOP_BRANCH = 'develop'
22-
23-
24-
def start_raster_vision_job(job_name, command, job_queue=RV_CPU_QUEUE,
25-
job_definition=RV_CPU_JOB_DEF,
26-
branch_name=DEVELOP_BRANCH, attempts=1):
27-
"""Start a raster-vision Batch job.
28-
29-
Args:
30-
job_name (str): name of the Batch job
31-
command (str): command to run inside the Docker container
32-
job_queue (str): name of the Batch job queue to run the job in
33-
job_definition (str): name of the Batch job definition
34-
branch_name (str): branch of the raster-vision repo to use
35-
attempts (int): number of attempts for the Batch job
36-
37-
Returns:
38-
job_id (str): job_id of job started on Batch
39-
"""
40-
batch_client = boto3.client('batch')
41-
# `run_script.sh $branch_name $command` downloads a branch of the
42-
# raster-vision repo and then runs the command.
43-
job_command = ['run_script.sh', branch_name, command]
44-
job_id = batch_client.submit_job(
45-
jobName=job_name, jobQueue=job_queue, jobDefinition=job_definition,
46-
containerOverrides={
47-
'command': job_command
48-
},
49-
retryStrategy={
50-
'attempts': attempts
51-
})['jobId']
52-
53-
return job_id
54-
5520

5621
class Project(object):
5722
"""A Raster Foundry project"""

rasterfoundry/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
RV_CPU_QUEUE = 'raster-vision-cpu'
2+
RV_CPU_JOB_DEF = 'raster-vision-cpu'
3+
RV_CONFIG_URI_ROOT = 's3://raster-vision/datasets/detection/configs'
4+
DEVELOP_BRANCH = 'develop'

rasterfoundry/utils.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from future.standard_library import install_aliases # noqa
2+
install_aliases() # noqa
3+
from urllib.parse import urlparse
4+
from os.path import join
5+
import tempfile
6+
import uuid
7+
import json
8+
9+
import boto3
10+
11+
12+
def start_raster_vision_job(job_name, command, job_queue, job_definition,
13+
branch_name, attempts=1):
14+
"""Start a raster-vision Batch job.
15+
16+
Args:
17+
job_name (str): name of the Batch job
18+
command (str): command to run inside the Docker container
19+
job_queue (str): name of the Batch job queue to run the job in
20+
job_definition (str): name of the Batch job definition
21+
branch_name (str): branch of the raster-vision repo to use
22+
attempts (int): number of attempts for the Batch job
23+
24+
Returns:
25+
job_id (str): job_id of job started on Batch
26+
"""
27+
batch_client = boto3.client('batch')
28+
# `run_script.sh $branch_name $command` downloads a branch of the
29+
# raster-vision repo and then runs the command.
30+
job_command = ['run_script.sh', branch_name, command]
31+
job_id = batch_client.submit_job(
32+
jobName=job_name, jobQueue=job_queue, jobDefinition=job_definition,
33+
containerOverrides={
34+
'command': job_command
35+
},
36+
retryStrategy={
37+
'attempts': attempts
38+
})['jobId']
39+
40+
return job_id
41+
42+
43+
def upload_raster_vision_config(config_dict, config_uri_root):
44+
"""Upload a config file to S3
45+
46+
Args:
47+
config_dict: a dictionary to turn into a JSON file to upload
48+
config_uri_root: the root of the URI to upload the config to
49+
50+
Returns:
51+
remote URI of the config file generate using a UUID
52+
"""
53+
with tempfile.NamedTemporaryFile('w') as config_file:
54+
json.dump(config_dict, config_file)
55+
config_uri = join(
56+
config_uri_root, '{}.json'.format(uuid.uuid1()))
57+
s3 = boto3.resource('s3')
58+
parsed_uri = urlparse(config_uri)
59+
# Rewind file to beginning so that full content will be loaded.
60+
# Without this line 0 bytes are uploaded.
61+
config_file.seek(0)
62+
s3.meta.client.upload_file(
63+
config_file.name, parsed_uri.netloc, parsed_uri.path[1:])
64+
65+
return config_uri

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
'pyasn1 >= 0.2.3',
2626
'requests >= 2.9.1',
2727
'bravado >= 8.4.0',
28-
'boto3 >= 1.4.4'
28+
'boto3 >= 1.4.4',
29+
'future >= 0.16.0'
2930
],
3031
extras_require={
3132
'notebook': [

0 commit comments

Comments
 (0)