From 68c11ef25b8236048f17decb0426af75c7f199bb Mon Sep 17 00:00:00 2001 From: Joanna Grycz <37943406+gryczj@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:46:53 +0100 Subject: [PATCH] feat: add tpu_vm_create_topology/startup_script (#3902) * feat: add tpu_vm_create_topology * feat: tpu_vm_create_startup_script * Use mocked TPUClient --- tpu/createStartupScriptVM.js | 101 ++++++++++++++++++++++++ tpu/createTopologyVM.js | 105 +++++++++++++++++++++++++ tpu/test/createStartupScriptVM.test.js | 69 ++++++++++++++++ tpu/test/createTopologyVM.test.js | 66 ++++++++++++++++ 4 files changed, 341 insertions(+) create mode 100644 tpu/createStartupScriptVM.js create mode 100644 tpu/createTopologyVM.js create mode 100644 tpu/test/createStartupScriptVM.test.js create mode 100644 tpu/test/createTopologyVM.test.js diff --git a/tpu/createStartupScriptVM.js b/tpu/createStartupScriptVM.js new file mode 100644 index 0000000000..eacef67c3d --- /dev/null +++ b/tpu/createStartupScriptVM.js @@ -0,0 +1,101 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +async function main(tpuClient) { + // [START tpu_vm_create_startup_script] + // Import the TPUClient + // TODO(developer): Uncomment below line before running the sample. + // const {TpuClient} = require('@google-cloud/tpu').v2; + + const {Node, NetworkConfig} = + require('@google-cloud/tpu').protos.google.cloud.tpu.v2; + + // Instantiate a tpuClient + // TODO(developer): Uncomment below line before running the sample. + // tpuClient = new TpuClient(); + + // TODO(developer): Update these variables before running the sample. + // Project ID or project number of the Google Cloud project you want to create a node. + const projectId = await tpuClient.getProjectId(); + + // The name of the network you want the TPU node to connect to. The network should be assigned to your project. + const networkName = 'compute-tpu-network'; + + // The region of the network, that you want the TPU node to connect to. + const region = 'europe-west4'; + + // The name for your TPU. + const nodeName = 'node-name-1'; + + // The zone in which to create the TPU. + // For more information about supported TPU types for specific zones, + // see https://cloud.google.com/tpu/docs/regions-zones + const zone = 'europe-west4-a'; + + // The accelerator type that specifies the version and size of the Cloud TPU you want to create. + // For more information about supported accelerator types for each TPU version, + // see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions. + const tpuType = 'v2-8'; + + // Software version that specifies the version of the TPU runtime to install. For more information, + // see https://cloud.google.com/tpu/docs/runtimes + const tpuSoftwareVersion = 'tpu-vm-tf-2.17.0-pod-pjrt'; + + async function callCreateTpuVMStartupScript() { + // Create a node + const node = new Node({ + name: nodeName, + zone, + acceleratorType: tpuType, + runtimeVersion: tpuSoftwareVersion, + // Define network + networkConfig: new NetworkConfig({ + enableExternalIps: true, + network: `projects/${projectId}/global/networks/${networkName}`, + subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`, + }), + metadata: { + // The script updates numpy to the latest version and logs the output to a file. + 'startup-script': `#!/bin/bash + echo "Hello World" > /var/log/hello.log + sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1`, + }, + }); + + const parent = `projects/${projectId}/locations/${zone}`; + const request = {parent, node, nodeId: nodeName}; + + const [operation] = await tpuClient.createNode(request); + + // Wait for the create operation to complete. + const [response] = await operation.promise(); + + console.log(JSON.stringify(response)); + return response; + } + return await callCreateTpuVMStartupScript(); + // [END tpu_vm_create_startup_script] +} + +module.exports = main; + +// TODO(developer): Uncomment below lines before running the sample. +// main(...process.argv.slice(2)).catch(err => { +// console.error(err); +// process.exitCode = 1; +// }); diff --git a/tpu/createTopologyVM.js b/tpu/createTopologyVM.js new file mode 100644 index 0000000000..0863a78ec2 --- /dev/null +++ b/tpu/createTopologyVM.js @@ -0,0 +1,105 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +async function main(tpuClient) { + // [START tpu_vm_create_topology] + // Import the TPUClient + // TODO(developer): Uncomment below line before running the sample. + // const {TpuClient} = require('@google-cloud/tpu').v2; + + const {Node, NetworkConfig, AcceleratorConfig} = + require('@google-cloud/tpu').protos.google.cloud.tpu.v2; + + // Instantiate a tpuClient + // TODO(developer): Uncomment below line before running the sample. + // tpuClient = new TpuClient(); + + /** + * TODO(developer): Update these variables before running the sample. + */ + // Project ID or project number of the Google Cloud project you want to create a node. + const projectId = await tpuClient.getProjectId(); + + // The name of the network you want the TPU node to connect to. The network should be assigned to your project. + const networkName = 'compute-tpu-network'; + + // The region of the network, that you want the TPU node to connect to. + const region = 'europe-west4'; + + // The name for your TPU. + const nodeName = 'node-name-1'; + + // The zone in which to create the TPU. + // For more information about supported TPU types for specific zones, + // see https://cloud.google.com/tpu/docs/regions-zones + const zone = 'europe-west4-a'; + + // Software version that specifies the version of the TPU runtime to install. For more information, + // see https://cloud.google.com/tpu/docs/runtimes + const tpuSoftwareVersion = 'tpu-vm-tf-2.17.0-pod-pjrt'; + + // The version of the Cloud TPU you want to create. + // Available options: TYPE_UNSPECIFIED = 0, V2 = 2, V3 = 4, V4 = 7 + const tpuVersion = AcceleratorConfig.Type.V2; + + // The physical topology of your TPU slice. + // For more information about topology for each TPU version, + // see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions. + const topology = '2x2'; + + async function callCreateTpuVMTopology() { + // Create a node + const node = new Node({ + name: nodeName, + zone, + // acceleratorType: tpuType, + runtimeVersion: tpuSoftwareVersion, + // Define network + networkConfig: new NetworkConfig({ + enableExternalIps: true, + network: `projects/${projectId}/global/networks/${networkName}`, + subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`, + }), + acceleratorConfig: new AcceleratorConfig({ + type: tpuVersion, + topology, + }), + }); + + const parent = `projects/${projectId}/locations/${zone}`; + const request = {parent, node, nodeId: nodeName}; + + const [operation] = await tpuClient.createNode(request); + + // Wait for the create operation to complete. + const [response] = await operation.promise(); + + console.log(JSON.stringify(response)); + return response; + } + return await callCreateTpuVMTopology(); + // [END tpu_vm_create_topology] +} + +module.exports = main; + +// TODO(developer): Uncomment below lines before running the sample. +// main(...process.argv.slice(2)).catch(err => { +// console.error(err); +// process.exitCode = 1; +// }); diff --git a/tpu/test/createStartupScriptVM.test.js b/tpu/test/createStartupScriptVM.test.js new file mode 100644 index 0000000000..a252096365 --- /dev/null +++ b/tpu/test/createStartupScriptVM.test.js @@ -0,0 +1,69 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const assert = require('node:assert/strict'); +const {beforeEach, afterEach, describe, it} = require('mocha'); +const sinon = require('sinon'); +const createStartupScriptVM = require('../createStartupScriptVM.js'); + +describe('Compute tpu', async () => { + const nodeName = 'node-name-1'; + const zone = 'europe-west4-a'; + const projectId = 'project_id'; + let tpuClientMock; + + beforeEach(() => { + tpuClientMock = { + getProjectId: sinon.stub().resolves(projectId), + }; + }); + + afterEach(() => { + sinon.restore(); + }); + + it('should create a new tpu with startup script', async () => { + tpuClientMock.createNode = sinon.stub().resolves([ + { + promise: sinon.stub().resolves([ + { + name: nodeName, + }, + ]), + }, + ]); + + const response = await createStartupScriptVM(tpuClientMock); + + sinon.assert.calledWith( + tpuClientMock.createNode, + sinon.match({ + parent: `projects/${projectId}/locations/${zone}`, + node: { + name: nodeName, + metadata: { + 'startup-script': + '#!/bin/bash\n echo "Hello World" > /var/log/hello.log\n sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1', + }, + }, + nodeId: nodeName, + }) + ); + assert(response.name.includes(nodeName)); + }); +}); diff --git a/tpu/test/createTopologyVM.test.js b/tpu/test/createTopologyVM.test.js new file mode 100644 index 0000000000..4f52d7407a --- /dev/null +++ b/tpu/test/createTopologyVM.test.js @@ -0,0 +1,66 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const assert = require('node:assert/strict'); +const {beforeEach, afterEach, describe, it} = require('mocha'); +const sinon = require('sinon'); +const createTopologyVM = require('../createTopologyVM.js'); + +describe('Compute tpu with topology', async () => { + const nodeName = 'node-name-1'; + const zone = 'europe-west4-a'; + const projectId = 'project_id'; + let tpuClientMock; + + beforeEach(() => { + tpuClientMock = { + getProjectId: sinon.stub().resolves(projectId), + }; + }); + + afterEach(() => { + sinon.restore(); + }); + + it('should create a new tpu with topology', async () => { + tpuClientMock.createNode = sinon.stub().resolves([ + { + promise: sinon.stub().resolves([ + { + name: nodeName, + }, + ]), + }, + ]); + + const response = await createTopologyVM(tpuClientMock); + + sinon.assert.calledWith( + tpuClientMock.createNode, + sinon.match({ + parent: `projects/${projectId}/locations/${zone}`, + node: { + name: nodeName, + acceleratorConfig: {type: 2, topology: '2x2'}, + }, + nodeId: nodeName, + }) + ); + assert(response.name.includes(nodeName)); + }); +});