Skip to content
This repository was archived by the owner on Nov 29, 2023. It is now read-only.

Commit 8ab7c71

Browse files
feat: support min_num_instances for primary worker and InstanceFlexibilityPolicy for secondary worker (#555)
* feat: support min_num_instances for primary worker and InstanceFlexibilityPolicy for secondary worker PiperOrigin-RevId: 559135594 Source-Link: googleapis/googleapis@4a5a6fd Source-Link: https://github.com/googleapis/googleapis-gen/commit/5c911261f00257f768a8a323aa1a3e483640c1d2 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNWM5MTEyNjFmMDAyNTdmNzY4YThhMzIzYWExYTNlNDgzNjQwYzFkMiJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 89e2fdb commit 8ab7c71

File tree

9 files changed

+501
-25
lines changed

9 files changed

+501
-25
lines changed

google/cloud/dataproc/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@
111111
from google.cloud.dataproc_v1.types.clusters import GceClusterConfig
112112
from google.cloud.dataproc_v1.types.clusters import GetClusterRequest
113113
from google.cloud.dataproc_v1.types.clusters import IdentityConfig
114+
from google.cloud.dataproc_v1.types.clusters import InstanceFlexibilityPolicy
114115
from google.cloud.dataproc_v1.types.clusters import InstanceGroupConfig
116+
from google.cloud.dataproc_v1.types.clusters import InstanceReference
115117
from google.cloud.dataproc_v1.types.clusters import KerberosConfig
116118
from google.cloud.dataproc_v1.types.clusters import LifecycleConfig
117119
from google.cloud.dataproc_v1.types.clusters import ListClustersRequest
@@ -266,7 +268,9 @@
266268
"GceClusterConfig",
267269
"GetClusterRequest",
268270
"IdentityConfig",
271+
"InstanceFlexibilityPolicy",
269272
"InstanceGroupConfig",
273+
"InstanceReference",
270274
"KerberosConfig",
271275
"LifecycleConfig",
272276
"ListClustersRequest",

google/cloud/dataproc_v1/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@
7171
from .types.clusters import GceClusterConfig
7272
from .types.clusters import GetClusterRequest
7373
from .types.clusters import IdentityConfig
74+
from .types.clusters import InstanceFlexibilityPolicy
7475
from .types.clusters import InstanceGroupConfig
76+
from .types.clusters import InstanceReference
7577
from .types.clusters import KerberosConfig
7678
from .types.clusters import LifecycleConfig
7779
from .types.clusters import ListClustersRequest
@@ -222,8 +224,10 @@
222224
"HadoopJob",
223225
"HiveJob",
224226
"IdentityConfig",
227+
"InstanceFlexibilityPolicy",
225228
"InstanceGroupAutoscalingPolicyConfig",
226229
"InstanceGroupConfig",
230+
"InstanceReference",
227231
"InstantiateInlineWorkflowTemplateRequest",
228232
"InstantiateWorkflowTemplateRequest",
229233
"Job",

google/cloud/dataproc_v1/types/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@
5858
GceClusterConfig,
5959
GetClusterRequest,
6060
IdentityConfig,
61+
InstanceFlexibilityPolicy,
6162
InstanceGroupConfig,
63+
InstanceReference,
6264
KerberosConfig,
6365
LifecycleConfig,
6466
ListClustersRequest,
@@ -198,7 +200,9 @@
198200
"GceClusterConfig",
199201
"GetClusterRequest",
200202
"IdentityConfig",
203+
"InstanceFlexibilityPolicy",
201204
"InstanceGroupConfig",
205+
"InstanceReference",
202206
"KerberosConfig",
203207
"LifecycleConfig",
204208
"ListClustersRequest",

google/cloud/dataproc_v1/types/clusters.py

Lines changed: 199 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@
4040
"ShieldedInstanceConfig",
4141
"ConfidentialInstanceConfig",
4242
"InstanceGroupConfig",
43+
"InstanceReference",
4344
"ManagedGroupConfig",
45+
"InstanceFlexibilityPolicy",
4446
"AcceleratorConfig",
4547
"DiskConfig",
4648
"AuxiliaryNodeGroup",
@@ -561,8 +563,8 @@ class GceClusterConfig(proto.Message):
561563
`Tagging
562564
instances <https://cloud.google.com/compute/docs/label-or-tag-resources#tags>`__).
563565
metadata (MutableMapping[str, str]):
564-
The Compute Engine metadata entries to add to all instances
565-
(see `Project and instance
566+
Optional. The Compute Engine metadata entries to add to all
567+
instances (see `Project and instance
566568
metadata <https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata>`__).
567569
reservation_affinity (google.cloud.dataproc_v1.types.ReservationAffinity):
568570
Optional. Reservation Affinity for consuming
@@ -771,6 +773,9 @@ class InstanceGroupConfig(proto.Message):
771773
Output only. The list of instance names. Dataproc derives
772774
the names from ``cluster_name``, ``num_instances``, and the
773775
instance group.
776+
instance_references (MutableSequence[google.cloud.dataproc_v1.types.InstanceReference]):
777+
Output only. List of references to Compute
778+
Engine instances.
774779
image_uri (str):
775780
Optional. The Compute Engine image resource used for cluster
776781
instances.
@@ -832,6 +837,31 @@ class InstanceGroupConfig(proto.Message):
832837
Optional. Specifies the minimum cpu platform for the
833838
Instance Group. See `Dataproc -> Minimum CPU
834839
Platform <https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu>`__.
840+
min_num_instances (int):
841+
Optional. The minimum number of instances to create. If
842+
min_num_instances is set, min_num_instances is used for a
843+
criteria to decide the cluster. Cluster creation will be
844+
failed by being an error state if the total number of
845+
instances created is less than the min_num_instances. For
846+
example, given that num_instances = 5 and min_num_instances
847+
= 3,
848+
849+
- if 4 instances are created and then registered
850+
successfully but one instance is failed, the failed VM
851+
will be deleted and the cluster will be resized to 4
852+
instances in running state.
853+
- if 2 instances are created successfully and 3 instances
854+
are failed, the cluster will be in an error state and
855+
does not delete failed VMs for debugging.
856+
- if 2 instance are created and then registered
857+
successfully but 3 instances are failed to initialize,
858+
the cluster will be in an error state and does not delete
859+
failed VMs for debugging. NB: This can only be set for
860+
primary workers now.
861+
instance_flexibility_policy (google.cloud.dataproc_v1.types.InstanceFlexibilityPolicy):
862+
Optional. Instance flexibility Policy
863+
allowing a mixture of VM shapes and provisioning
864+
models.
835865
"""
836866

837867
class Preemptibility(proto.Enum):
@@ -877,6 +907,11 @@ class Preemptibility(proto.Enum):
877907
proto.STRING,
878908
number=2,
879909
)
910+
instance_references: MutableSequence["InstanceReference"] = proto.RepeatedField(
911+
proto.MESSAGE,
912+
number=11,
913+
message="InstanceReference",
914+
)
880915
image_uri: str = proto.Field(
881916
proto.STRING,
882917
number=3,
@@ -913,6 +948,51 @@ class Preemptibility(proto.Enum):
913948
proto.STRING,
914949
number=9,
915950
)
951+
min_num_instances: int = proto.Field(
952+
proto.INT32,
953+
number=12,
954+
)
955+
instance_flexibility_policy: "InstanceFlexibilityPolicy" = proto.Field(
956+
proto.MESSAGE,
957+
number=13,
958+
message="InstanceFlexibilityPolicy",
959+
)
960+
961+
962+
class InstanceReference(proto.Message):
963+
r"""A reference to a Compute Engine instance.
964+
965+
Attributes:
966+
instance_name (str):
967+
The user-friendly name of the Compute Engine
968+
instance.
969+
instance_id (str):
970+
The unique identifier of the Compute Engine
971+
instance.
972+
public_key (str):
973+
The public RSA key used for sharing data with
974+
this instance.
975+
public_ecies_key (str):
976+
The public ECIES key used for sharing data
977+
with this instance.
978+
"""
979+
980+
instance_name: str = proto.Field(
981+
proto.STRING,
982+
number=1,
983+
)
984+
instance_id: str = proto.Field(
985+
proto.STRING,
986+
number=2,
987+
)
988+
public_key: str = proto.Field(
989+
proto.STRING,
990+
number=3,
991+
)
992+
public_ecies_key: str = proto.Field(
993+
proto.STRING,
994+
number=4,
995+
)
916996

917997

918998
class ManagedGroupConfig(proto.Message):
@@ -926,6 +1006,10 @@ class ManagedGroupConfig(proto.Message):
9261006
instance_group_manager_name (str):
9271007
Output only. The name of the Instance Group
9281008
Manager for this group.
1009+
instance_group_manager_uri (str):
1010+
Output only. The partial URI to the instance
1011+
group manager for this group. E.g.
1012+
projects/my-project/regions/us-central1/instanceGroupManagers/my-igm.
9291013
"""
9301014

9311015
instance_template_name: str = proto.Field(
@@ -936,6 +1020,94 @@ class ManagedGroupConfig(proto.Message):
9361020
proto.STRING,
9371021
number=2,
9381022
)
1023+
instance_group_manager_uri: str = proto.Field(
1024+
proto.STRING,
1025+
number=3,
1026+
)
1027+
1028+
1029+
class InstanceFlexibilityPolicy(proto.Message):
1030+
r"""Instance flexibility Policy allowing a mixture of VM shapes
1031+
and provisioning models.
1032+
1033+
Attributes:
1034+
instance_selection_list (MutableSequence[google.cloud.dataproc_v1.types.InstanceFlexibilityPolicy.InstanceSelection]):
1035+
Optional. List of instance selection options
1036+
that the group will use when creating new VMs.
1037+
instance_selection_results (MutableSequence[google.cloud.dataproc_v1.types.InstanceFlexibilityPolicy.InstanceSelectionResult]):
1038+
Output only. A list of instance selection
1039+
results in the group.
1040+
"""
1041+
1042+
class InstanceSelection(proto.Message):
1043+
r"""Defines machines types and a rank to which the machines types
1044+
belong.
1045+
1046+
Attributes:
1047+
machine_types (MutableSequence[str]):
1048+
Optional. Full machine-type names, e.g.
1049+
"n1-standard-16".
1050+
rank (int):
1051+
Optional. Preference of this instance
1052+
selection. Lower number means higher preference.
1053+
Dataproc will first try to create a VM based on
1054+
the machine-type with priority rank and fallback
1055+
to next rank based on availability. Machine
1056+
types and instance selections with the same
1057+
priority have the same preference.
1058+
"""
1059+
1060+
machine_types: MutableSequence[str] = proto.RepeatedField(
1061+
proto.STRING,
1062+
number=1,
1063+
)
1064+
rank: int = proto.Field(
1065+
proto.INT32,
1066+
number=2,
1067+
)
1068+
1069+
class InstanceSelectionResult(proto.Message):
1070+
r"""Defines a mapping from machine types to the number of VMs
1071+
that are created with each machine type.
1072+
1073+
1074+
.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
1075+
1076+
Attributes:
1077+
machine_type (str):
1078+
Output only. Full machine-type names, e.g.
1079+
"n1-standard-16".
1080+
1081+
This field is a member of `oneof`_ ``_machine_type``.
1082+
vm_count (int):
1083+
Output only. Number of VM provisioned with the machine_type.
1084+
1085+
This field is a member of `oneof`_ ``_vm_count``.
1086+
"""
1087+
1088+
machine_type: str = proto.Field(
1089+
proto.STRING,
1090+
number=1,
1091+
optional=True,
1092+
)
1093+
vm_count: int = proto.Field(
1094+
proto.INT32,
1095+
number=2,
1096+
optional=True,
1097+
)
1098+
1099+
instance_selection_list: MutableSequence[InstanceSelection] = proto.RepeatedField(
1100+
proto.MESSAGE,
1101+
number=2,
1102+
message=InstanceSelection,
1103+
)
1104+
instance_selection_results: MutableSequence[
1105+
InstanceSelectionResult
1106+
] = proto.RepeatedField(
1107+
proto.MESSAGE,
1108+
number=3,
1109+
message=InstanceSelectionResult,
1110+
)
9391111

9401112

9411113
class AcceleratorConfig(proto.Message):
@@ -1198,6 +1370,9 @@ class State(proto.Enum):
11981370
STARTING (8):
11991371
The cluster is being started. It is not ready
12001372
for use.
1373+
REPAIRING (10):
1374+
The cluster is being repaired. It is not
1375+
ready for use.
12011376
"""
12021377
UNKNOWN = 0
12031378
CREATING = 1
@@ -1209,6 +1384,7 @@ class State(proto.Enum):
12091384
STOPPING = 6
12101385
STOPPED = 7
12111386
STARTING = 8
1387+
REPAIRING = 10
12121388

12131389
class Substate(proto.Enum):
12141390
r"""The cluster substate.
@@ -1602,18 +1778,18 @@ class DataprocMetricConfig(proto.Message):
16021778
"""
16031779

16041780
class MetricSource(proto.Enum):
1605-
r"""A source for the collection of Dataproc OSS metrics (see [available
1606-
OSS metrics]
1607-
(https://cloud.google.com//dataproc/docs/guides/monitoring#available_oss_metrics)).
1781+
r"""A source for the collection of Dataproc custom metrics (see [Custom
1782+
metrics]
1783+
(https://cloud.google.com//dataproc/docs/guides/dataproc-metrics#custom_metrics)).
16081784
16091785
Values:
16101786
METRIC_SOURCE_UNSPECIFIED (0):
16111787
Required unspecified metric source.
16121788
MONITORING_AGENT_DEFAULTS (1):
1613-
Default monitoring agent metrics. If this source is enabled,
1789+
Monitoring agent metrics. If this source is enabled,
16141790
Dataproc enables the monitoring agent in Compute Engine, and
1615-
collects default monitoring agent metrics, which are
1616-
published with an ``agent.googleapis.com`` prefix.
1791+
collects monitoring agent metrics, which are published with
1792+
an ``agent.googleapis.com`` prefix.
16171793
HDFS (2):
16181794
HDFS metric source.
16191795
SPARK (3):
@@ -1637,20 +1813,20 @@ class MetricSource(proto.Enum):
16371813
HIVEMETASTORE = 7
16381814

16391815
class Metric(proto.Message):
1640-
r"""A Dataproc OSS metric.
1816+
r"""A Dataproc custom metric.
16411817
16421818
Attributes:
16431819
metric_source (google.cloud.dataproc_v1.types.DataprocMetricConfig.MetricSource):
1644-
Required. Default metrics are collected unless
1820+
Required. A standard set of metrics is collected unless
16451821
``metricOverrides`` are specified for the metric source (see
1646-
[Available OSS metrics]
1647-
(https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
1822+
[Custom metrics]
1823+
(https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
16481824
for more information).
16491825
metric_overrides (MutableSequence[str]):
1650-
Optional. Specify one or more [available OSS metrics]
1651-
(https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
1826+
Optional. Specify one or more [Custom metrics]
1827+
(https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
16521828
to collect for the metric course (for the ``SPARK`` metric
1653-
source, any [Spark metric]
1829+
source (any [Spark metric]
16541830
(https://spark.apache.org/docs/latest/monitoring.html#metrics)
16551831
can be specified).
16561832
@@ -1669,15 +1845,15 @@ class Metric(proto.Message):
16691845
16701846
Notes:
16711847
1672-
- Only the specified overridden metrics will be collected
1673-
for the metric source. For example, if one or more
1848+
- Only the specified overridden metrics are collected for
1849+
the metric source. For example, if one or more
16741850
``spark:executive`` metrics are listed as metric
1675-
overrides, other ``SPARK`` metrics will not be collected.
1676-
The collection of the default metrics for other OSS
1677-
metric sources is unaffected. For example, if both
1678-
``SPARK`` andd ``YARN`` metric sources are enabled, and
1679-
overrides are provided for Spark metrics only, all
1680-
default YARN metrics will be collected.
1851+
overrides, other ``SPARK`` metrics are not collected. The
1852+
collection of the metrics for other enabled custom metric
1853+
sources is unaffected. For example, if both ``SPARK``
1854+
andd ``YARN`` metric sources are enabled, and overrides
1855+
are provided for Spark metrics only, all YARN metrics are
1856+
collected.
16811857
"""
16821858

16831859
metric_source: "DataprocMetricConfig.MetricSource" = proto.Field(

google/cloud/dataproc_v1/types/node_groups.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class ResizeNodeGroupRequest(proto.Message):
115115
underscores (_), and hyphens (-). The maximum length is 40
116116
characters.
117117
graceful_decommission_timeout (google.protobuf.duration_pb2.Duration):
118-
Optional. Timeout for graceful YARN decomissioning.
118+
Optional. Timeout for graceful YARN decommissioning.
119119
[Graceful decommissioning]
120120
(https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters#graceful_decommissioning)
121121
allows the removal of nodes from the Compute Engine node

0 commit comments

Comments
 (0)