Skip to content

Commit

Permalink
feat: add tpu_vm_create_topology/startup_script (#3902)
Browse files Browse the repository at this point in the history
* feat: add tpu_vm_create_topology

* feat: tpu_vm_create_startup_script

* Use mocked TPUClient
  • Loading branch information
gryczj authored Dec 3, 2024
1 parent 87b18af commit 68c11ef
Show file tree
Hide file tree
Showing 4 changed files with 341 additions and 0 deletions.
101 changes: 101 additions & 0 deletions tpu/createStartupScriptVM.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

async function main(tpuClient) {
// [START tpu_vm_create_startup_script]
// Import the TPUClient
// TODO(developer): Uncomment below line before running the sample.
// const {TpuClient} = require('@google-cloud/tpu').v2;

const {Node, NetworkConfig} =
require('@google-cloud/tpu').protos.google.cloud.tpu.v2;

// Instantiate a tpuClient
// TODO(developer): Uncomment below line before running the sample.
// tpuClient = new TpuClient();

// TODO(developer): Update these variables before running the sample.
// Project ID or project number of the Google Cloud project you want to create a node.
const projectId = await tpuClient.getProjectId();

// The name of the network you want the TPU node to connect to. The network should be assigned to your project.
const networkName = 'compute-tpu-network';

// The region of the network, that you want the TPU node to connect to.
const region = 'europe-west4';

// The name for your TPU.
const nodeName = 'node-name-1';

// The zone in which to create the TPU.
// For more information about supported TPU types for specific zones,
// see https://cloud.google.com/tpu/docs/regions-zones
const zone = 'europe-west4-a';

// The accelerator type that specifies the version and size of the Cloud TPU you want to create.
// For more information about supported accelerator types for each TPU version,
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
const tpuType = 'v2-8';

// Software version that specifies the version of the TPU runtime to install. For more information,
// see https://cloud.google.com/tpu/docs/runtimes
const tpuSoftwareVersion = 'tpu-vm-tf-2.17.0-pod-pjrt';

async function callCreateTpuVMStartupScript() {
// Create a node
const node = new Node({
name: nodeName,
zone,
acceleratorType: tpuType,
runtimeVersion: tpuSoftwareVersion,
// Define network
networkConfig: new NetworkConfig({
enableExternalIps: true,
network: `projects/${projectId}/global/networks/${networkName}`,
subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`,
}),
metadata: {
// The script updates numpy to the latest version and logs the output to a file.
'startup-script': `#!/bin/bash
echo "Hello World" > /var/log/hello.log
sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1`,
},
});

const parent = `projects/${projectId}/locations/${zone}`;
const request = {parent, node, nodeId: nodeName};

const [operation] = await tpuClient.createNode(request);

// Wait for the create operation to complete.
const [response] = await operation.promise();

console.log(JSON.stringify(response));
return response;
}
return await callCreateTpuVMStartupScript();
// [END tpu_vm_create_startup_script]
}

module.exports = main;

// TODO(developer): Uncomment below lines before running the sample.
// main(...process.argv.slice(2)).catch(err => {
// console.error(err);
// process.exitCode = 1;
// });
105 changes: 105 additions & 0 deletions tpu/createTopologyVM.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

async function main(tpuClient) {
// [START tpu_vm_create_topology]
// Import the TPUClient
// TODO(developer): Uncomment below line before running the sample.
// const {TpuClient} = require('@google-cloud/tpu').v2;

const {Node, NetworkConfig, AcceleratorConfig} =
require('@google-cloud/tpu').protos.google.cloud.tpu.v2;

// Instantiate a tpuClient
// TODO(developer): Uncomment below line before running the sample.
// tpuClient = new TpuClient();

/**
* TODO(developer): Update these variables before running the sample.
*/
// Project ID or project number of the Google Cloud project you want to create a node.
const projectId = await tpuClient.getProjectId();

// The name of the network you want the TPU node to connect to. The network should be assigned to your project.
const networkName = 'compute-tpu-network';

// The region of the network, that you want the TPU node to connect to.
const region = 'europe-west4';

// The name for your TPU.
const nodeName = 'node-name-1';

// The zone in which to create the TPU.
// For more information about supported TPU types for specific zones,
// see https://cloud.google.com/tpu/docs/regions-zones
const zone = 'europe-west4-a';

// Software version that specifies the version of the TPU runtime to install. For more information,
// see https://cloud.google.com/tpu/docs/runtimes
const tpuSoftwareVersion = 'tpu-vm-tf-2.17.0-pod-pjrt';

// The version of the Cloud TPU you want to create.
// Available options: TYPE_UNSPECIFIED = 0, V2 = 2, V3 = 4, V4 = 7
const tpuVersion = AcceleratorConfig.Type.V2;

// The physical topology of your TPU slice.
// For more information about topology for each TPU version,
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
const topology = '2x2';

async function callCreateTpuVMTopology() {
// Create a node
const node = new Node({
name: nodeName,
zone,
// acceleratorType: tpuType,
runtimeVersion: tpuSoftwareVersion,
// Define network
networkConfig: new NetworkConfig({
enableExternalIps: true,
network: `projects/${projectId}/global/networks/${networkName}`,
subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`,
}),
acceleratorConfig: new AcceleratorConfig({
type: tpuVersion,
topology,
}),
});

const parent = `projects/${projectId}/locations/${zone}`;
const request = {parent, node, nodeId: nodeName};

const [operation] = await tpuClient.createNode(request);

// Wait for the create operation to complete.
const [response] = await operation.promise();

console.log(JSON.stringify(response));
return response;
}
return await callCreateTpuVMTopology();
// [END tpu_vm_create_topology]
}

module.exports = main;

// TODO(developer): Uncomment below lines before running the sample.
// main(...process.argv.slice(2)).catch(err => {
// console.error(err);
// process.exitCode = 1;
// });
69 changes: 69 additions & 0 deletions tpu/test/createStartupScriptVM.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

const assert = require('node:assert/strict');
const {beforeEach, afterEach, describe, it} = require('mocha');
const sinon = require('sinon');
const createStartupScriptVM = require('../createStartupScriptVM.js');

describe('Compute tpu', async () => {
const nodeName = 'node-name-1';
const zone = 'europe-west4-a';
const projectId = 'project_id';
let tpuClientMock;

beforeEach(() => {
tpuClientMock = {
getProjectId: sinon.stub().resolves(projectId),
};
});

afterEach(() => {
sinon.restore();
});

it('should create a new tpu with startup script', async () => {
tpuClientMock.createNode = sinon.stub().resolves([
{
promise: sinon.stub().resolves([
{
name: nodeName,
},
]),
},
]);

const response = await createStartupScriptVM(tpuClientMock);

sinon.assert.calledWith(
tpuClientMock.createNode,
sinon.match({
parent: `projects/${projectId}/locations/${zone}`,
node: {
name: nodeName,
metadata: {
'startup-script':
'#!/bin/bash\n echo "Hello World" > /var/log/hello.log\n sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1',
},
},
nodeId: nodeName,
})
);
assert(response.name.includes(nodeName));
});
});
66 changes: 66 additions & 0 deletions tpu/test/createTopologyVM.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

const assert = require('node:assert/strict');
const {beforeEach, afterEach, describe, it} = require('mocha');
const sinon = require('sinon');
const createTopologyVM = require('../createTopologyVM.js');

describe('Compute tpu with topology', async () => {
const nodeName = 'node-name-1';
const zone = 'europe-west4-a';
const projectId = 'project_id';
let tpuClientMock;

beforeEach(() => {
tpuClientMock = {
getProjectId: sinon.stub().resolves(projectId),
};
});

afterEach(() => {
sinon.restore();
});

it('should create a new tpu with topology', async () => {
tpuClientMock.createNode = sinon.stub().resolves([
{
promise: sinon.stub().resolves([
{
name: nodeName,
},
]),
},
]);

const response = await createTopologyVM(tpuClientMock);

sinon.assert.calledWith(
tpuClientMock.createNode,
sinon.match({
parent: `projects/${projectId}/locations/${zone}`,
node: {
name: nodeName,
acceleratorConfig: {type: 2, topology: '2x2'},
},
nodeId: nodeName,
})
);
assert(response.name.includes(nodeName));
});
});

0 comments on commit 68c11ef

Please sign in to comment.