Skip to content

Commit

Permalink
feat: tpu_queued_resources_startup_script
Browse files Browse the repository at this point in the history
  • Loading branch information
Joanna Grycz committed Oct 23, 2024
1 parent 7a89075 commit 3ef4622
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 0 deletions.
54 changes: 54 additions & 0 deletions .github/workflows/tpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: tpu
on:
push:
branches:
- main
paths:
- 'tpu/**'
- '.github/workflows/tpu.yaml'
- '.github/workflows/test.yaml'
pull_request:
types:
- opened
- reopened
- synchronize
- labeled
paths:
- 'tpu/**'
- '.github/workflows/tpu.yaml'
- '.github/workflows/test.yaml'
schedule:
- cron: '0 0 * * 0'
jobs:
test:
# Ref: https://github.com/google-github-actions/auth#usage
permissions:
contents: 'read'
id-token: 'write'
if: github.event.action != 'labeled' || github.event.label.name == 'actions:force-run'
uses: ./.github/workflows/test.yaml
with:
name: 'tpu'
path: 'tpu'
flakybot:
# Ref: https://github.com/google-github-actions/auth#usage
permissions:
contents: 'read'
id-token: 'write'
if: github.event_name == 'schedule' && always() # always() submits logs even if tests fail
uses: ./.github/workflows/flakybot.yaml
needs: [test]
1 change: 1 addition & 0 deletions .github/workflows/utils/workflows.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
"speech",
"talent",
"texttospeech",
"tpu",
"translate",
"video-intelligence",
"vision/productSearch",
Expand Down
1 change: 1 addition & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ compute @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-revie
iam @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
kms @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
orgpolicy @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
tpu @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
recaptcha_enterprise @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
recaptcha_enterprise/demosite @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/recaptcha-customer-obsession-reviewers @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
secret-manager @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/cloud-secrets-team
Expand Down
23 changes: 23 additions & 0 deletions tpu/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"name": "nodejs-docs-samples-tpu",
"license": "Apache-2.0",
"author": "Google Inc.",
"engines": {
"node": ">=16.0.0"
},
"repository": "googleapis/nodejs-tpu",
"private": true,
"files": [
"*.js"
],
"scripts": {
"test": "c8 mocha -p -j 2 test --timeout 1200000"
},
"dependencies": {
"@google-cloud/tpu": "^3.5.0"
},
"devDependencies": {
"c8": "^10.0.0",
"mocha": "^10.0.0"
}
}
130 changes: 130 additions & 0 deletions tpu/queuedResources/createQueuedResourceStartupScript.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

async function main(
nodeName,
queuedResourceName,
zone,
tpuType,
tpuSoftwareVersion
) {
// [START tpu_queued_resources_startup_script]
// Import the TPU library
const {TpuClient} = require('@google-cloud/tpu').v2alpha1;
const {Node, NetworkConfig, QueuedResource} =
require('@google-cloud/tpu').protos.google.cloud.tpu.v2alpha1;

// Instantiate a tpuClient
const tpuClient = new TpuClient();

/**
* TODO(developer): Update/uncomment these variables before running the sample.
*/
// Project ID or project number of the Google Cloud project, where you want to create queued resource.
const projectId = await tpuClient.getProjectId();

// The name of the network you want the node to connect to. The network should be assigned to your project.
const networkName = 'compute-tpu-network';

// The region of the network, that you want the node to connect to.
const region = 'europe-west4';

// The name for your queued resource.
// queuedResourceName = 'queued-resource-1';

// The name for your node.
// nodeName = 'node-name-1';

// The zone in which to create the node.
// For more information about supported TPU types for specific zones,
// see https://cloud.google.com/tpu/docs/regions-zones
// zone = 'europe-west4-a';

// The accelerator type that specifies the version and size of the node you want to create.
// For more information about supported accelerator types for each TPU version,
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
// tpuType = 'v2-8';

// Software version that specifies the version of the node runtime to install. For more information,
// see https://cloud.google.com/tpu/docs/runtimes
// tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';

async function callCreateQueuedResourceStartupScript() {
// Create a node
const node = new Node({
name: nodeName,
zone,
acceleratorType: tpuType,
runtimeVersion: tpuSoftwareVersion,
// Define network
networkConfig: new NetworkConfig({
enableExternalIps: true,
network: `projects/${projectId}/global/networks/${networkName}`,
subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`,
}),
queuedResource: `projects/${projectId}/locations/${zone}/queuedResources/${queuedResourceName}`,
metadata: {
// The script updates numpy to the latest version and logs the output to a file.
'startup-script': `#!/bin/bash
echo "Hello World" > /var/log/hello.log
sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1`,
},
});

// Define parent for requests
const parent = `projects/${projectId}/locations/${zone}`;

// Create queued resource
const queuedResource = new QueuedResource({
name: queuedResourceName,
tpu: {
nodeSpec: [
{
parent,
node,
nodeId: nodeName,
},
],
},
});

const request = {
parent: `projects/${projectId}/locations/${zone}`,
queuedResource,
queuedResourceId: queuedResourceName,
};

const [operation] = await tpuClient.createQueuedResource(request);

// Wait for the create operation to complete.
await operation.promise();

// You can wait until TPU Node is READY,
// and check its status using getTpuVm() from `tpu_vm_get` sample.
console.log(
`Queued resource ${queuedResourceName} with start-up script created.`
);
}
await callCreateQueuedResourceStartupScript();
// [END tpu_queued_resources_startup_script]
}

main(...process.argv.slice(2)).catch(err => {
console.error(err);
process.exitCode = 1;
});
59 changes: 59 additions & 0 deletions tpu/queuedResources/forceDeleteQueuedResource.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

async function main(queuedResourceName, zone) {
// [START tpu_queued_resources_delete_force]
// Import the TPU library
const {TpuClient} = require('@google-cloud/tpu').v2alpha1;

// Instantiate a tpuClient
const tpuClient = new TpuClient();

/**
* TODO(developer): Update/uncomment these variables before running the sample.
*/
// Project ID or project number of the Google Cloud project, where you want to delete node.
const projectId = await tpuClient.getProjectId();

// The name of queued resource.
// queuedResourceName = 'queued-resource-1';

// The zone of your queued resource.
// zone = 'europe-west4-a';

async function callForceDeleteQueuedResource() {
const request = {
name: `projects/${projectId}/locations/${zone}/queuedResources/${queuedResourceName}`,
force: true,
};

const [operation] = await tpuClient.deleteQueuedResource(request);

// Wait for the delete operation to complete.
await operation.promise();

console.log(`Queued resource ${queuedResourceName} deletion forced.`);
}
await callForceDeleteQueuedResource();
// [END tpu_queued_resources_delete_force]
}

main(...process.argv.slice(2)).catch(err => {
console.error(err);
process.exitCode = 1;
});
3 changes: 3 additions & 0 deletions tpu/test/.eslintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
env:
mocha: true
58 changes: 58 additions & 0 deletions tpu/test/createQueuedResourceStartupScript.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

const path = require('path');
const assert = require('node:assert/strict');
const {after, describe, it} = require('mocha');
const cp = require('child_process');

const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
const cwd = path.join(__dirname, '..');

describe('TPU queued resource with start-up script', async () => {
const queuedResourceName = `queued-resource-startup-script-${Math.floor(Math.random() * 1000 + 1)}`;
const nodeName = `node-startup-script-2a2b3c${Math.floor(Math.random() * 1000 + 1)}`;
const zone = 'us-east1-d';
const tpuType = 'v3-32';
const tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';

after(() => {
// Delete queued resource
execSync(
`node ./queuedResources/forceDeleteQueuedResource.js ${queuedResourceName} ${zone}`,
{
cwd,
}
);
});

it('should create queued resource with start-up script', () => {
const response = execSync(
`node ./queuedResources/createQueuedResourceStartupScript.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
cwd,
}
);

assert(
response.includes(
`Queued resource ${queuedResourceName} with start-up script created.`
)
);
});
});

0 comments on commit 3ef4622

Please sign in to comment.