diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ca79aba529f..f21afe0f659 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -44,6 +44,7 @@ /security-command-center @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/gcp-security-command-center /servicedirectory @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra /webrisk @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra +/tpu @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra # DEE Platform Ops (DEEPO) /errorreporting @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers diff --git a/tpu/pom.xml b/tpu/pom.xml new file mode 100644 index 00000000000..601db56977d --- /dev/null +++ b/tpu/pom.xml @@ -0,0 +1,101 @@ + + + + 4.0.0 + com.example.tpu + gce-diregapic-samples + 1.0-SNAPSHOT + + + + shared-configuration + com.google.cloud.samples + 1.2.0 + + + + 11 + 11 + + + + + com.google.cloud + google-cloud-tpu + 2.52.0 + + + + com.google.api + gax + + + + + google-cloud-storage + com.google.cloud + test + + + + truth + com.google.truth + test + 1.4.0 + + + junit + junit + test + 4.13.2 + + + + + org.junit.jupiter + junit-jupiter-engine + 5.10.2 + test + + + org.mockito + mockito-core + 5.13.0 + test + + + + + + + libraries-bom + com.google.cloud + import + pom + 26.40.0 + + + + + \ No newline at end of file diff --git a/tpu/src/main/java/tpu/CreateQueuedResourceWithNetwork.java b/tpu/src/main/java/tpu/CreateQueuedResourceWithNetwork.java new file mode 100644 index 00000000000..de9aa884aac --- /dev/null +++ b/tpu/src/main/java/tpu/CreateQueuedResourceWithNetwork.java @@ -0,0 +1,139 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package tpu; + +//[START tpu_queued_resources_network] +import com.google.api.gax.retrying.RetrySettings; +import com.google.cloud.tpu.v2alpha1.CreateQueuedResourceRequest; +import com.google.cloud.tpu.v2alpha1.NetworkConfig; +import com.google.cloud.tpu.v2alpha1.Node; +import com.google.cloud.tpu.v2alpha1.QueuedResource; +import com.google.cloud.tpu.v2alpha1.TpuClient; +import com.google.cloud.tpu.v2alpha1.TpuSettings; +import java.io.IOException; +import java.util.concurrent.ExecutionException; +import org.threeten.bp.Duration; + +public class CreateQueuedResourceWithNetwork { + public static void main(String[] args) + throws IOException, ExecutionException, InterruptedException { + // TODO(developer): Replace these variables before running the sample. + // Project ID or project number of the Google Cloud project you want to create a node. + String projectId = "YOUR_PROJECT_ID"; + // The zone in which to create the TPU. + // For more information about supported TPU types for specific zones, + // see https://cloud.google.com/tpu/docs/regions-zones + String zone = "europe-west4-a"; + // The name for your TPU. + String nodeName = "YOUR_TPU_NAME"; + // The accelerator type that specifies the version and size of the Cloud TPU you want to create. + // For more information about supported accelerator types for each TPU version, + // see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions. + String tpuType = "v2-8"; + // Software version that specifies the version of the TPU runtime to install. + // For more information see https://cloud.google.com/tpu/docs/runtimes + String tpuSoftwareVersion = "tpu-vm-tf-2.14.1"; + // The name for your Queued Resource. + String queuedResourceId = "QUEUED_RESOURCE_ID"; + // The name of the network you want the node to connect to. + // The network should be assigned to your project. + String networkName = "YOUR_COMPUTE_TPU_NETWORK"; + + createQueuedResourceWithNetwork(projectId, zone, queuedResourceId, nodeName, + tpuType, tpuSoftwareVersion, networkName); + } + + // Creates a Queued Resource with network configuration. + public static QueuedResource createQueuedResourceWithNetwork( + String projectId, String zone, String queuedResourceId, String nodeName, + String tpuType, String tpuSoftwareVersion, String networkName) + throws IOException, ExecutionException, InterruptedException { + // With these settings the client library handles the Operation's polling mechanism + // and prevent CancellationException error + TpuSettings.Builder clientSettings = + TpuSettings.newBuilder(); + clientSettings + .createQueuedResourceSettings() + .setRetrySettings( + RetrySettings.newBuilder() + .setInitialRetryDelay(Duration.ofMillis(5000L)) + .setRetryDelayMultiplier(2.0) + .setInitialRpcTimeout(Duration.ZERO) + .setRpcTimeoutMultiplier(1.0) + .setMaxRetryDelay(Duration.ofMillis(45000L)) + .setTotalTimeout(Duration.ofHours(24L)) + .build()); + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. + try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) { + String parent = String.format("projects/%s/locations/%s", projectId, zone); + String region = zone.substring(0, zone.length() - 2); + + // Specify the network and subnetwork that you want to connect your TPU to. + NetworkConfig networkConfig = + NetworkConfig.newBuilder() + .setEnableExternalIps(true) + .setNetwork(String.format("projects/%s/global/networks/%s", projectId, networkName)) + .setSubnetwork( + String.format( + "projects/%s/regions/%s/subnetworks/%s", projectId, region, networkName)) + .build(); + + // Create a node + Node node = + Node.newBuilder() + .setName(nodeName) + .setAcceleratorType(tpuType) + .setRuntimeVersion(tpuSoftwareVersion) + .setNetworkConfig(networkConfig) + .setQueuedResource( + String.format( + "projects/%s/locations/%s/queuedResources/%s", + projectId, zone, queuedResourceId)) + .build(); + + // Create queued resource + QueuedResource queuedResource = + QueuedResource.newBuilder() + .setName(queuedResourceId) + .setTpu( + QueuedResource.Tpu.newBuilder() + .addNodeSpec( + QueuedResource.Tpu.NodeSpec.newBuilder() + .setParent(parent) + .setNode(node) + .setNodeId(nodeName) + .build()) + .build()) + .build(); + + CreateQueuedResourceRequest request = + CreateQueuedResourceRequest.newBuilder() + .setParent(parent) + .setQueuedResource(queuedResource) + .setQueuedResourceId(queuedResourceId) + .build(); + + QueuedResource response = tpuClient.createQueuedResourceAsync(request).get(); + // You can wait until TPU Node is READY, + // and check its status using getTpuVm() from "tpu_vm_get" sample. + System.out.println("Queued Resource created: " + queuedResourceId); + return response; + } + } +} +//[END tpu_queued_resources_network] diff --git a/tpu/src/main/java/tpu/DeleteForceQueuedResource.java b/tpu/src/main/java/tpu/DeleteForceQueuedResource.java new file mode 100644 index 00000000000..ed499d8cac5 --- /dev/null +++ b/tpu/src/main/java/tpu/DeleteForceQueuedResource.java @@ -0,0 +1,78 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package tpu; + +//[START tpu_queued_resources_delete_force] + +import com.google.api.gax.retrying.RetrySettings; +import com.google.api.gax.rpc.UnknownException; +import com.google.cloud.tpu.v2alpha1.DeleteQueuedResourceRequest; +import com.google.cloud.tpu.v2alpha1.TpuClient; +import com.google.cloud.tpu.v2alpha1.TpuSettings; +import java.io.IOException; +import java.util.concurrent.ExecutionException; +import org.threeten.bp.Duration; + +public class DeleteForceQueuedResource { + public static void main(String[] args) { + // TODO(developer): Replace these variables before running the sample. + // Project ID or project number of the Google Cloud project. + String projectId = "YOUR_PROJECT_ID"; + // The zone in which the TPU was created. + String zone = "europe-west4-a"; + // The name for your Queued Resource. + String queuedResourceId = "QUEUED_RESOURCE_ID"; + + deleteForceQueuedResource(projectId, zone, queuedResourceId); + } + + // Deletes a Queued Resource asynchronously with --force flag. + public static void deleteForceQueuedResource( + String projectId, String zone, String queuedResourceId) { + String name = String.format("projects/%s/locations/%s/queuedResources/%s", + projectId, zone, queuedResourceId); + // With these settings the client library handles the Operation's polling mechanism + // and prevent CancellationException error + TpuSettings.Builder clientSettings = + TpuSettings.newBuilder(); + clientSettings + .deleteQueuedResourceSettings() + .setRetrySettings( + RetrySettings.newBuilder() + .setInitialRetryDelay(Duration.ofMillis(5000L)) + .setRetryDelayMultiplier(2.0) + .setInitialRpcTimeout(Duration.ZERO) + .setRpcTimeoutMultiplier(1.0) + .setMaxRetryDelay(Duration.ofMillis(45000L)) + .setTotalTimeout(Duration.ofHours(24L)) + .build()); + + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. + try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) { + DeleteQueuedResourceRequest request = + DeleteQueuedResourceRequest.newBuilder().setName(name).setForce(true).build(); + + tpuClient.deleteQueuedResourceAsync(request).get(); + + } catch (UnknownException | InterruptedException | ExecutionException | IOException e) { + System.out.println(e.getMessage()); + } + System.out.printf("Deleted Queued Resource: %s\n", name); + } +} +//[END tpu_queued_resources_delete_force] diff --git a/tpu/src/main/java/tpu/GetQueuedResource.java b/tpu/src/main/java/tpu/GetQueuedResource.java new file mode 100644 index 00000000000..3a510e045fe --- /dev/null +++ b/tpu/src/main/java/tpu/GetQueuedResource.java @@ -0,0 +1,54 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package tpu; + +//[START tpu_queued_resources_get] + +import com.google.cloud.tpu.v2alpha1.GetQueuedResourceRequest; +import com.google.cloud.tpu.v2alpha1.QueuedResource; +import com.google.cloud.tpu.v2alpha1.TpuClient; +import java.io.IOException; + +public class GetQueuedResource { + public static void main(String[] args) throws IOException { + // TODO(developer): Replace these variables before running the sample. + // Project ID or project number of the Google Cloud project. + String projectId = "YOUR_PROJECT_ID"; + // The zone in which the TPU was created. + String zone = "europe-west4-a"; + // The name for your Queued Resource. + String queuedResourceId = "QUEUED_RESOURCE_ID"; + + getQueuedResource(projectId, zone, queuedResourceId); + } + + // Get a Queued Resource. + public static QueuedResource getQueuedResource( + String projectId, String zone, String queuedResourceId) throws IOException { + String name = String.format("projects/%s/locations/%s/queuedResources/%s", + projectId, zone, queuedResourceId); + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. + try (TpuClient tpuClient = TpuClient.create()) { + GetQueuedResourceRequest request = + GetQueuedResourceRequest.newBuilder().setName(name).build(); + + return tpuClient.getQueuedResource(request); + } + } +} +//[END tpu_queued_resources_get] diff --git a/tpu/src/test/java/tpu/CreateQueuedResourceWithNetworkIT.java b/tpu/src/test/java/tpu/CreateQueuedResourceWithNetworkIT.java new file mode 100644 index 00000000000..8e7f56fb97b --- /dev/null +++ b/tpu/src/test/java/tpu/CreateQueuedResourceWithNetworkIT.java @@ -0,0 +1,79 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package tpu; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.api.gax.rpc.NotFoundException; +import com.google.cloud.tpu.v2alpha1.QueuedResource; +import java.io.IOException; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Timeout; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +@Timeout(value = 6, unit = TimeUnit.MINUTES) +public class CreateQueuedResourceWithNetworkIT { + + private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); + private static final String ZONE = "europe-west4-a"; + static String javaVersion = System.getProperty("java.version").substring(0, 2); + private static final String NODE_NAME = "test-tpu-queued-resource-network-" + javaVersion + "-" + + UUID.randomUUID().toString().substring(0, 8); + private static final String TPU_TYPE = "v2-8"; + private static final String TPU_SOFTWARE_VERSION = "tpu-vm-tf-2.14.1"; + private static final String QUEUED_RESOURCE_NAME = "queued-resource-network-" + javaVersion + "-" + + UUID.randomUUID().toString().substring(0, 8); + private static final String NETWORK_NAME = "default"; + + @BeforeAll + public static void setUp() throws IOException { + + // Cleanup existing stale resources. + Util.cleanUpExistingQueuedResources("queued-resource-network-", PROJECT_ID, ZONE); + } + + @AfterAll + public static void cleanup() { + DeleteForceQueuedResource.deleteForceQueuedResource(PROJECT_ID, ZONE, QUEUED_RESOURCE_NAME); + + // Test that resource is deleted + Assertions.assertThrows( + NotFoundException.class, + () -> GetQueuedResource.getQueuedResource(PROJECT_ID, ZONE, QUEUED_RESOURCE_NAME)); + } + + @Test + public void testCreateQueuedResourceWithSpecifiedNetwork() throws Exception { + + QueuedResource queuedResource = CreateQueuedResourceWithNetwork.createQueuedResourceWithNetwork( + PROJECT_ID, ZONE, QUEUED_RESOURCE_NAME, NODE_NAME, + TPU_TYPE, TPU_SOFTWARE_VERSION, NETWORK_NAME); + + assertThat(queuedResource.getTpu().getNodeSpec(0).getNode().getName()).isEqualTo(NODE_NAME); + assertThat(queuedResource.getTpu().getNodeSpec(0).getNode().getNetworkConfig().getNetwork() + .contains(NETWORK_NAME)); + assertThat(queuedResource.getTpu().getNodeSpec(0).getNode().getNetworkConfig().getSubnetwork() + .contains(NETWORK_NAME)); + } +} \ No newline at end of file diff --git a/tpu/src/test/java/tpu/Util.java b/tpu/src/test/java/tpu/Util.java new file mode 100644 index 00000000000..c5d4d839291 --- /dev/null +++ b/tpu/src/test/java/tpu/Util.java @@ -0,0 +1,87 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package tpu; + +import com.google.cloud.tpu.v2.Node; +import com.google.cloud.tpu.v2.TpuClient; +import com.google.cloud.tpu.v2alpha1.QueuedResource; +import com.google.protobuf.Timestamp; +import java.io.IOException; +import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; +import java.util.concurrent.ExecutionException; + +public class Util { + private static final int DELETION_THRESHOLD_TIME_MINUTES = 30; + + // Delete TPU VMs which starts with the given prefixToDelete and + // has creation timestamp >30 minutes. + public static void cleanUpExistingQueuedResources( + String prefixToDelete, String projectId, String zone) + throws IOException { + try (com.google.cloud.tpu.v2alpha1.TpuClient tpuClient = + com.google.cloud.tpu.v2alpha1.TpuClient.create()) { + String parent = String.format("projects/%s/locations/%s", projectId, zone); + + for (QueuedResource queuedResource : tpuClient.listQueuedResources(parent).iterateAll()) { + + com.google.cloud.tpu.v2alpha1.Node node = queuedResource.getTpu().getNodeSpec(0).getNode(); + String creationTime = formatTimestamp(node.getCreateTime()); + String name = queuedResource.getName() + .substring(queuedResource.getName().lastIndexOf("/") + 1); + if (containPrefixToDeleteAndZone(queuedResource, prefixToDelete, zone) + && isCreatedBeforeThresholdTime(creationTime)) { + DeleteForceQueuedResource.deleteForceQueuedResource(projectId, zone, name); + } + } + } + } + + public static boolean containPrefixToDeleteAndZone( + Object resource, String prefixToDelete, String zone) { + boolean containPrefixAndZone = false; + try { + if (resource instanceof Node) { + containPrefixAndZone = ((Node) resource).getName().contains(prefixToDelete) + && ((Node) resource).getName().split("/")[3].contains(zone); + } + if (resource instanceof QueuedResource) { + containPrefixAndZone = ((QueuedResource) resource).getName().contains(prefixToDelete) + && ((QueuedResource) resource).getName().split("/")[3].contains(zone); + } + } catch (NullPointerException e) { + System.out.println("Resource not found, skipping deletion:"); + } + return containPrefixAndZone; + } + + public static boolean isCreatedBeforeThresholdTime(String timestamp) { + return OffsetDateTime.parse(timestamp).toInstant() + .isBefore(Instant.now().minus(DELETION_THRESHOLD_TIME_MINUTES, ChronoUnit.MINUTES)); + } + + private static String formatTimestamp(Timestamp timestamp) { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); + OffsetDateTime offsetDateTime = OffsetDateTime.ofInstant( + Instant.ofEpochSecond(timestamp.getSeconds(), timestamp.getNanos()), + ZoneOffset.UTC); + return formatter.format(offsetDateTime); + } +} \ No newline at end of file