diff --git a/.repo-metadata.json b/.repo-metadata.json index 58771655b6..d207a35896 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -4,10 +4,11 @@ "product_documentation": "https://cloud.google.com/ai-platform", "client_documentation": "https://cloud.google.com/python/docs/reference/aiplatform/latest", "issue_tracker": "https://issuetracker.google.com/savedsearches/559744", - "release_level": "ga", + "release_level": "stable", "language": "python", "library_type": "GAPIC_COMBO", "repo": "googleapis/python-aiplatform", "distribution_name": "google-cloud-aiplatform", - "api_id": "aiplatform.googleapis.com" + "api_id": "aiplatform.googleapis.com", + "api_shortname": "aiplatform" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e3632c5fd..33f6ed7e6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## [1.9.0](https://www.github.com/googleapis/python-aiplatform/compare/v1.8.1...v1.9.0) (2021-12-29) + + +### Features + +* add create in Featurestore, EntityType, Feature; add create_entity_type in Featurestore; add create_feature, batch_create_features in EntityType; add ingest_from_* for bq and gcs in EntityType; add and update delete with force delete nested resources ([#872](https://www.github.com/googleapis/python-aiplatform/issues/872)) ([ba11c3d](https://www.github.com/googleapis/python-aiplatform/commit/ba11c3d3cd8d3869e2deb3207a8698fa7ce284ec)) +* Add LIT methods for Pandas DataFrame and TensorFlow saved model. ([#874](https://www.github.com/googleapis/python-aiplatform/issues/874)) ([03cf301](https://www.github.com/googleapis/python-aiplatform/commit/03cf301989a5802b122803eac7a2d03f2d1769fb)) +* Add support to create TensorboardExperiment ([#909](https://www.github.com/googleapis/python-aiplatform/issues/909)) ([96ce738](https://www.github.com/googleapis/python-aiplatform/commit/96ce7387ac58e0ec7cb6a7f6d6a6e422eae5da96)) +* Add support to create TensorboardRun ([#912](https://www.github.com/googleapis/python-aiplatform/issues/912)) ([8df74a2](https://www.github.com/googleapis/python-aiplatform/commit/8df74a29df0adb95fff5500fcc9d7a025012ab5e)) + + +### Bug Fixes + +* Fix timestamp proto util to default to timestamp at call time. ([#933](https://www.github.com/googleapis/python-aiplatform/issues/933)) ([d72a254](https://www.github.com/googleapis/python-aiplatform/commit/d72a254e97cf74f3fdd55a32a4af86737243593a)) +* Improve handling of undeploying model without redistributing remaining traffic ([#898](https://www.github.com/googleapis/python-aiplatform/issues/898)) ([8a8a4fa](https://www.github.com/googleapis/python-aiplatform/commit/8a8a4faa667bde2a4df04afa23a6dd5b1856f958)) +* issues/192254729 ([#914](https://www.github.com/googleapis/python-aiplatform/issues/914)) ([3ec620c](https://www.github.com/googleapis/python-aiplatform/commit/3ec620c64bd60ceb5b89918200e11e3fbff67370)) +* issues/192254729 ([#915](https://www.github.com/googleapis/python-aiplatform/issues/915)) ([0f22ff6](https://www.github.com/googleapis/python-aiplatform/commit/0f22ff61460a3f2bd55d2c10c4ee06e582f03944)) +* use open_in_new_tab in the render method. ([#926](https://www.github.com/googleapis/python-aiplatform/issues/926)) ([04618e0](https://www.github.com/googleapis/python-aiplatform/commit/04618e0563b8588eec2ccd8342c6085ca08b5adb)) + ### [1.8.1](https://www.github.com/googleapis/python-aiplatform/compare/v1.8.0...v1.8.1) (2021-12-14) diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index 3e206a5538..1defb5ad47 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -47,7 +47,11 @@ HyperparameterTuningJob, ) from google.cloud.aiplatform.pipeline_jobs import PipelineJob -from google.cloud.aiplatform.tensorboard import Tensorboard +from google.cloud.aiplatform.tensorboard import ( + Tensorboard, + TensorboardExperiment, + TensorboardRun, +) from google.cloud.aiplatform.training_jobs import ( CustomTrainingJob, CustomContainerTrainingJob, @@ -105,8 +109,10 @@ "Model", "PipelineJob", "TabularDataset", + "Tensorboard", + "TensorboardExperiment", + "TensorboardRun", "TextDataset", "TimeSeriesDataset", "VideoDataset", - "Tensorboard", ) diff --git a/google/cloud/aiplatform/base.py b/google/cloud/aiplatform/base.py index d572913a25..a3e8c352b0 100644 --- a/google/cloud/aiplatform/base.py +++ b/google/cloud/aiplatform/base.py @@ -397,7 +397,6 @@ class VertexAiResourceNoun(metaclass=abc.ABCMeta): Subclasses require two class attributes: client_class: The client to instantiate to interact with this resource noun. - _is_client_prediction_client: Flag to indicate if the client requires a prediction endpoint. Subclass is required to populate private attribute _gca_resource which is the service representation of the resource noun. @@ -414,29 +413,43 @@ def client_class(cls) -> Type[utils.VertexAiServiceClientWithOverride]: @property @classmethod @abc.abstractmethod - def _is_client_prediction_client(cls) -> bool: - """Flag to indicate whether to use prediction endpoint with client.""" - pass - - @property - @abc.abstractmethod def _getter_method(cls) -> str: """Name of getter method of client class for retrieving the resource.""" pass @property + @classmethod @abc.abstractmethod def _delete_method(cls) -> str: """Name of delete method of client class for deleting the resource.""" pass @property + @classmethod @abc.abstractmethod def _resource_noun(cls) -> str: """Resource noun.""" pass + @property + @classmethod + @abc.abstractmethod + def _parse_resource_name_method(cls) -> str: + """Method name on GAPIC client to parse a resource name.""" + pass + + @property + @classmethod + @abc.abstractmethod + def _format_resource_name_method(self) -> str: + """Method name on GAPIC client to format a resource name.""" + pass + + # Override this value with staticmethod + # to use custom resource id validators per resource + _resource_id_validator: Optional[Callable[[str], None]] = None + def __init__( self, project: Optional[str] = None, @@ -486,15 +499,48 @@ def _instantiate_client( client_class=cls.client_class, credentials=credentials, location_override=location, - prediction_client=cls._is_client_prediction_client, ) + @classmethod + def _parse_resource_name(cls, resource_name: str) -> Dict[str, str]: + """ + Parses resource name into its component segments. + + Args: + resource_name: Resource name of this resource. + Returns: + Dictionary of component segments. + """ + # gets the underlying wrapped gapic client class + return getattr( + cls.client_class.get_gapic_client_class(), cls._parse_resource_name_method + )(resource_name) + + @classmethod + def _format_resource_name(cls, **kwargs: str) -> str: + """ + Formats a resource name using its component segments. + + Args: + **kwargs: Resource name parts. Singular and snake case. ie: + format_resource_name( + project='my-project', + location='us-central1' + ) + Returns: + Resource name. + """ + # gets the underlying wrapped gapic client class + return getattr( + cls.client_class.get_gapic_client_class(), cls._format_resource_name_method + )(**kwargs) + def _get_and_validate_project_location( self, resource_name: str, project: Optional[str] = None, location: Optional[str] = None, - ) -> Tuple: + ) -> Tuple[str, str]: """Validate the project and location for the resource. @@ -507,33 +553,42 @@ def _get_and_validate_project_location( RuntimeError: If location is different from resource location """ - fields = utils.extract_fields_from_resource_name( - resource_name, self._resource_noun - ) + fields = self._parse_resource_name(resource_name) + if not fields: return project, location - if location and fields.location != location: + if location and fields["location"] != location: raise RuntimeError( f"location {location} is provided, but different from " - f"the resource location {fields.location}" + f"the resource location {fields['location']}" ) - return fields.project, fields.location + return fields["project"], fields["location"] + + def _get_gca_resource( + self, + resource_name: str, + parent_resource_name_fields: Optional[Dict[str, str]] = None, + ) -> proto.Message: + """Returns GAPIC service representation of client class resource. - def _get_gca_resource(self, resource_name: str) -> proto.Message: - """Returns GAPIC service representation of client class resource.""" - """ Args: - resource_name (str): - Required. A fully-qualified resource name or ID. + resource_name (str): Required. A fully-qualified resource name or ID. + parent_resource_name_fields (Dict[str,str]): + Optional. Mapping of parent resource name key to values. These + will be used to compose the resource name if only resource ID is given. + Should not include project and location. """ - resource_name = utils.full_resource_name( resource_name=resource_name, resource_noun=self._resource_noun, + parse_resource_name_method=self._parse_resource_name, + format_resource_name_method=self._format_resource_name, project=self.project, location=self.location, + parent_resource_name_fields=parent_resource_name_fields, + resource_id_validator=self._resource_id_validator, ) return getattr(self.api_client, self._getter_method)( diff --git a/google/cloud/aiplatform/datasets/dataset.py b/google/cloud/aiplatform/datasets/dataset.py index cdb769a8b2..30f518fb71 100644 --- a/google/cloud/aiplatform/datasets/dataset.py +++ b/google/cloud/aiplatform/datasets/dataset.py @@ -39,11 +39,12 @@ class _Dataset(base.VertexAiResourceNounWithFutureManager): """Managed dataset resource for Vertex AI.""" client_class = utils.DatasetClientWithOverride - _is_client_prediction_client = False _resource_noun = "datasets" _getter_method = "get_dataset" _list_method = "list_datasets" _delete_method = "delete_dataset" + _parse_resource_name_method = "parse_dataset_path" + _format_resource_name_method = "dataset_path" _supported_metadata_schema_uris: Tuple[str] = () diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py new file mode 100644 index 0000000000..c76840502d --- /dev/null +++ b/google/cloud/aiplatform/explain/lit.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Tuple, Union + +try: + from lit_nlp.api import dataset as lit_dataset + from lit_nlp.api import model as lit_model + from lit_nlp.api import types as lit_types + from lit_nlp import notebook +except ImportError: + raise ImportError( + "LIT is not installed and is required to get Dataset as the return format. " + 'Please install the SDK using "pip install python-aiplatform[lit]"' + ) + +try: + import tensorflow as tf +except ImportError: + raise ImportError( + "Tensorflow is not installed and is required to load saved model. " + 'Please install the SDK using "pip install pip install python-aiplatform[lit]"' + ) + +try: + import pandas as pd +except ImportError: + raise ImportError( + "Pandas is not installed and is required to read the dataset. " + 'Please install Pandas using "pip install python-aiplatform[lit]"' + ) + + +class _VertexLitDataset(lit_dataset.Dataset): + """LIT dataset class for the Vertex LIT integration. + + This is used in the create_lit_dataset function. + """ + + def __init__( + self, + dataset: pd.DataFrame, + column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + ): + """Construct a VertexLitDataset. + Args: + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. + """ + self._examples = dataset.to_dict(orient="records") + self._column_types = column_types + + def spec(self): + """Return a spec describing dataset elements.""" + return dict(self._column_types) + + +class _VertexLitModel(lit_model.Model): + """LIT model class for the Vertex LIT integration. + + This is used in the create_lit_model function. + """ + + def __init__( + self, + model: str, + input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + ): + """Construct a VertexLitModel. + Args: + model: + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + """ + self._loaded_model = tf.saved_model.load(model) + serving_default = self._loaded_model.signatures[ + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ] + _, self._kwargs_signature = serving_default.structured_input_signature + self._output_signature = serving_default.structured_outputs + + if len(self._kwargs_signature) != 1: + raise ValueError("Please use a model with only one input tensor.") + + if len(self._output_signature) != 1: + raise ValueError("Please use a model with only one output tensor.") + + self._input_types = input_types + self._output_types = output_types + + def predict_minibatch( + self, inputs: List[lit_types.JsonDict] + ) -> List[lit_types.JsonDict]: + """Returns predictions for a single batch of examples. + Args: + inputs: + sequence of inputs, following model.input_spec() + Returns: + list of outputs, following model.output_spec() + """ + instances = [] + for input in inputs: + instance = [input[feature] for feature in self._input_types] + instances.append(instance) + prediction_input_dict = { + next(iter(self._kwargs_signature)): tf.convert_to_tensor(instances) + } + prediction_dict = self._loaded_model.signatures[ + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ](**prediction_input_dict) + predictions = prediction_dict[next(iter(self._output_signature))].numpy() + outputs = [] + for prediction in predictions: + outputs.append( + { + label: value + for label, value in zip(self._output_types.keys(), prediction) + } + ) + return outputs + + def input_spec(self) -> lit_types.Spec: + """Return a spec describing model inputs.""" + return dict(self._input_types) + + def output_spec(self) -> lit_types.Spec: + """Return a spec describing model outputs.""" + return self._output_types + + +def create_lit_dataset( + dataset: pd.DataFrame, + column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 +) -> lit_dataset.Dataset: + """Creates a LIT Dataset object. + Args: + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. + Returns: + A LIT Dataset object that has the data from the dataset provided. + """ + return _VertexLitDataset(dataset, column_types) + + +def create_lit_model( + model: str, + input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 +) -> lit_model.Model: + """Creates a LIT Model object. + Args: + model: + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + Returns: + A LIT Model object that has the same functionality as the model provided. + """ + return _VertexLitModel(model, input_types, output_types) + + +def open_lit( + models: Dict[str, lit_model.Model], + datasets: Dict[str, lit_dataset.Dataset], + open_in_new_tab: bool = True, +): + """Open LIT from the provided models and datasets. + Args: + models: + Required. A list of LIT models to open LIT with. + input_types: + Required. A lit of LIT datasets to open LIT with. + open_in_new_tab: + Optional. A boolean to choose if LIT open in a new tab or not. + Raises: + ImportError if LIT is not installed. + """ + widget = notebook.LitWidget(models, datasets) + widget.render(open_in_new_tab=open_in_new_tab) + + +def set_up_and_open_lit( + dataset: Union[pd.DataFrame, lit_dataset.Dataset], + column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + model: Union[str, lit_model.Model], + input_types: Union[List[str], Dict[str, lit_types.LitType]], + output_types: Union[str, List[str], Dict[str, lit_types.LitType]], + open_in_new_tab: bool = True, +) -> Tuple[lit_dataset.Dataset, lit_model.Model]: + """Creates a LIT dataset and model and opens LIT. + Args: + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. + model: + Required. A string reference to a TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + Returns: + A Tuple of the LIT dataset and model created. + Raises: + ImportError if LIT or TensorFlow is not installed. + ValueError if the model doesn't have only 1 input and output tensor. + """ + if not isinstance(dataset, lit_dataset.Dataset): + dataset = create_lit_dataset(dataset, column_types) + + if not isinstance(model, lit_model.Model): + model = create_lit_model(model, input_types, output_types) + + open_lit({"model": model}, {"dataset": dataset}, open_in_new_tab=open_in_new_tab) + + return dataset, model diff --git a/google/cloud/aiplatform/featurestore/entity_type.py b/google/cloud/aiplatform/featurestore/entity_type.py index 327bf1931d..9b2524e45c 100644 --- a/google/cloud/aiplatform/featurestore/entity_type.py +++ b/google/cloud/aiplatform/featurestore/entity_type.py @@ -15,13 +15,18 @@ # limitations under the License. # -from typing import Dict, List, Optional, Sequence, Tuple +import datetime +from typing import Dict, List, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials from google.protobuf import field_mask_pb2 from google.cloud.aiplatform import base -from google.cloud.aiplatform.compat.types import entity_type as gca_entity_type +from google.cloud.aiplatform.compat.types import ( + entity_type as gca_entity_type, + featurestore_service as gca_featurestore_service, + io as gca_io, +) from google.cloud.aiplatform import featurestore from google.cloud.aiplatform import utils from google.cloud.aiplatform.utils import featurestore_utils @@ -36,10 +41,22 @@ class EntityType(base.VertexAiResourceNounWithFutureManager): client_class = utils.FeaturestoreClientWithOverride _is_client_prediction_client = False - _resource_noun = None + _resource_noun = "entityTypes" _getter_method = "get_entity_type" _list_method = "list_entity_types" _delete_method = "delete_entity_type" + _parse_resource_name_method = "parse_entity_type_path" + _format_resource_name_method = "entity_type_path" + + @staticmethod + def _resource_id_validator(resource_id: str): + """Validates resource ID. + + Args: + resource_id(str): + The resource id to validate. + """ + featurestore_utils.validate_id(resource_id) def __init__( self, @@ -69,7 +86,8 @@ def __init__( Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id/entityTypes/my_entity_type_id" or "my_entity_type_id" when project and location are initialized or passed, with featurestore_id passed. featurestore_id (str): - Optional. Featurestore ID to retrieve entityType from, when entity_type_name is passed as entity_type ID. + Optional. Featurestore ID of an existing featurestore to retrieve entityType from, + when entity_type_name is passed as entity_type ID. project (str): Optional. Project to retrieve entityType from. If not set, project set in aiplatform.init will be used. @@ -81,31 +99,26 @@ def __init__( credentials set in aiplatform.init. """ - ( - featurestore_id, - _, - ) = featurestore_utils.validate_and_get_entity_type_resource_ids( - entity_type_name=entity_type_name, featurestore_id=featurestore_id - ) - - # TODO(b/208269923): Temporary workaround, update when base class supports nested resource - self._resource_noun = f"featurestores/{featurestore_id}/entityTypes" - super().__init__( project=project, location=location, credentials=credentials, resource_name=entity_type_name, ) - self._gca_resource = self._get_gca_resource(resource_name=entity_type_name) + self._gca_resource = self._get_gca_resource( + resource_name=entity_type_name, + parent_resource_name_fields={ + featurestore.Featurestore._resource_noun: featurestore_id + } + if featurestore_id + else featurestore_id, + ) @property def featurestore_name(self) -> str: """Full qualified resource name of the managed featurestore in which this EntityType is.""" - entity_type_name_components = featurestore_utils.CompatFeaturestoreServiceClient.parse_entity_type_path( - path=self.resource_name - ) - return featurestore_utils.CompatFeaturestoreServiceClient.featurestore_path( + entity_type_name_components = self._parse_resource_name(self.resource_name) + return featurestore.Featurestore._format_resource_name( project=entity_type_name_components["project"], location=entity_type_name_components["location"], featurestore=entity_type_name_components["featurestore"], @@ -128,12 +141,10 @@ def get_feature(self, feature_id: str) -> "featurestore.Feature": Returns: featurestore.Feature - The managed feature resource object. """ - entity_type_name_components = featurestore_utils.CompatFeaturestoreServiceClient.parse_entity_type_path( - path=self.resource_name - ) + entity_type_name_components = self._parse_resource_name(self.resource_name) return featurestore.Feature( - feature_name=featurestore_utils.CompatFeaturestoreServiceClient.feature_path( + feature_name=featurestore.Feature._format_resource_name( project=entity_type_name_components["project"], location=entity_type_name_components["location"], featurestore=entity_type_name_components["featurestore"], @@ -241,7 +252,8 @@ def list( Args: featurestore_name (str): - Required. A fully-qualified featurestore resource name or a featurestore ID to list entityTypes in + Required. A fully-qualified featurestore resource name or a featurestore ID + of an existing featurestore to list entityTypes in. Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id" or "my_featurestore_id" when project and location are initialized or passed. filter (str): @@ -299,9 +311,12 @@ def list( credentials=credentials, parent=utils.full_resource_name( resource_name=featurestore_name, - resource_noun="featurestores", + resource_noun=featurestore.Featurestore._resource_noun, + parse_resource_name_method=featurestore.Featurestore._parse_resource_name, + format_resource_name_method=featurestore.Featurestore._format_resource_name, project=project, location=location, + resource_id_validator=featurestore.Featurestore._resource_id_validator, ), ) @@ -381,3 +396,745 @@ def delete_features(self, feature_ids: List[str], sync: bool = True,) -> None: for feature in features: feature.wait() + + @base.optional_sync() + def delete(self, sync: bool = True, force: bool = False) -> None: + """Deletes this EntityType resource. If force is set to True, + all features in this EntityType will be deleted prior to entityType deletion. + + WARNING: This deletion is permanent. + + Args: + force (bool): + If set to true, any Features for this + EntityType will also be deleted. + (Otherwise, the request will only work + if the EntityType has no Features.) + sync (bool): + Whether to execute this deletion synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + Raises: + FailedPrecondition: If features are created in this EntityType and force = False. + """ + _LOGGER.log_action_start_against_resource("Deleting", "", self) + lro = getattr(self.api_client, self._delete_method)( + name=self.resource_name, force=force + ) + _LOGGER.log_action_started_against_resource_with_lro( + "Delete", "", self.__class__, lro + ) + lro.result() + _LOGGER.log_action_completed_against_resource("deleted.", "", self) + + @classmethod + @base.optional_sync() + def create( + cls, + entity_type_id: str, + featurestore_name: str, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + sync: bool = True, + ) -> "EntityType": + """Creates an EntityType resource in a Featurestore. + + Example Usage: + + my_entity_type = aiplatform.EntityType.create( + entity_type_id='my_entity_type_id', + featurestore_name='projects/123/locations/us-central1/featurestores/my_featurestore_id' + ) + or + my_entity_type = aiplatform.EntityType.create( + entity_type_id='my_entity_type_id', + featurestore_name='my_featurestore_id', + ) + + Args: + entity_type_id (str): + Required. The ID to use for the EntityType, which will + become the final component of the EntityType's resource + name. + + This value may be up to 60 characters, and valid characters + are ``[a-z0-9_]``. The first character cannot be a number. + + The value must be unique within a featurestore. + featurestore_name (str): + Required. A fully-qualified featurestore resource name or a featurestore ID + of an existing featurestore to create EntityType in. + Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id" + or "my_featurestore_id" when project and location are initialized or passed. + description (str): + Optional. Description of the EntityType. + labels (Dict[str, str]): + Optional. The labels with user-defined + metadata to organize your EntityTypes. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + on and examples of labels. No more than 64 user + labels can be associated with one EntityType + (System labels are excluded)." + System reserved label keys are prefixed with + "aiplatform.googleapis.com/" and are immutable. + project (str): + Optional. Project to create EntityType in if `featurestore_name` is passed an featurestore ID. + If not set, project set in aiplatform.init will be used. + location (str): + Optional. Location to create EntityType in if `featurestore_name` is passed an featurestore ID. + If not set, location set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to create EntityTypes. Overrides + credentials set in aiplatform.init. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + sync (bool): + Optional. Whether to execute this creation synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + EntityType - entity_type resource object + + """ + + featurestore_name = utils.full_resource_name( + resource_name=featurestore_name, + resource_noun=featurestore.Featurestore._resource_noun, + parse_resource_name_method=featurestore.Featurestore._parse_resource_name, + format_resource_name_method=featurestore.Featurestore._format_resource_name, + project=project, + location=location, + resource_id_validator=featurestore.Featurestore._resource_id_validator, + ) + + featurestore_name_components = featurestore.Featurestore._parse_resource_name( + featurestore_name + ) + + gapic_entity_type = gca_entity_type.EntityType() + + if labels: + utils.validate_labels(labels) + gapic_entity_type.labels = labels + + if description: + gapic_entity_type.description = description + + api_client = cls._instantiate_client( + location=featurestore_name_components["location"], credentials=credentials, + ) + + created_entity_type_lro = api_client.create_entity_type( + parent=featurestore_name, + entity_type=gapic_entity_type, + entity_type_id=entity_type_id, + metadata=request_metadata, + ) + + _LOGGER.log_create_with_lro(cls, created_entity_type_lro) + + created_entity_type = created_entity_type_lro.result() + + _LOGGER.log_create_complete(cls, created_entity_type, "entity_type") + + entity_type_obj = cls( + entity_type_name=created_entity_type.name, + project=project, + location=location, + credentials=credentials, + ) + + return entity_type_obj + + def create_feature( + self, + feature_id: str, + value_type: str, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + sync: bool = True, + ) -> "featurestore.Feature": + """Creates a Feature resource in this EntityType. + + Example Usage: + + my_entity_type = aiplatform.EntityType( + entity_type_name='my_entity_type_id', + featurestore_id='my_featurestore_id', + ) + my_feature = my_entity_type.create_feature( + feature_id='my_feature_id', + value_type='INT64', + ) + + Args: + feature_id (str): + Required. The ID to use for the Feature, which will become + the final component of the Feature's resource name, which is immutable. + + This value may be up to 60 characters, and valid characters + are ``[a-z0-9_]``. The first character cannot be a number. + + The value must be unique within an EntityType. + value_type (str): + Required. Immutable. Type of Feature value. + One of BOOL, BOOL_ARRAY, DOUBLE, DOUBLE_ARRAY, INT64, INT64_ARRAY, STRING, STRING_ARRAY, BYTES. + description (str): + Optional. Description of the Feature. + labels (Dict[str, str]): + Optional. The labels with user-defined + metadata to organize your Features. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + on and examples of labels. No more than 64 user + labels can be associated with one Feature + (System labels are excluded)." + System reserved label keys are prefixed with + "aiplatform.googleapis.com/" and are immutable. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + sync (bool): + Optional. Whether to execute this creation synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + featurestore.Feature - feature resource object + + """ + return featurestore.Feature.create( + feature_id=feature_id, + value_type=value_type, + entity_type_name=self.resource_name, + description=description, + labels=labels, + request_metadata=request_metadata, + sync=sync, + ) + + def _validate_and_get_create_feature_requests( + self, + feature_configs: Dict[str, Dict[str, Union[bool, int, Dict[str, str], str]]], + ) -> List[gca_featurestore_service.CreateFeatureRequest]: + """ Validates feature_configs and get requests for batch feature creation + + Args: + feature_configs (Dict[str, Dict[str, Union[bool, int, Dict[str, str], str]]]): + Required. A user defined Dict containing configurations for feature creation. + + Returns: + List[gca_featurestore_service.CreateFeatureRequest] - requests for batch feature creation + """ + + requests = [] + for feature_id, feature_config in feature_configs.items(): + feature_config = featurestore_utils._FeatureConfig( + feature_id=feature_id, + value_type=feature_config.get( + "value_type", featurestore_utils._FEATURE_VALUE_TYPE_UNSPECIFIED + ), + description=feature_config.get("description", None), + labels=feature_config.get("labels", {}), + ) + create_feature_request = feature_config.get_create_feature_request() + requests.append(create_feature_request) + + return requests + + @base.optional_sync(return_input_arg="self") + def batch_create_features( + self, + feature_configs: Dict[str, Dict[str, Union[bool, int, Dict[str, str], str]]], + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + sync: bool = True, + ) -> "EntityType": + """Batch creates Feature resources in this EntityType. + + Example Usage: + + my_entity_type = aiplatform.EntityType( + entity_type_name='my_entity_type_id', + featurestore_id='my_featurestore_id', + ) + my_entity_type.batch_create_features( + feature_configs={ + "my_feature_id1": { + "value_type": "INT64", + }, + "my_feature_id2": { + "value_type": "BOOL", + }, + "my_feature_id3": { + "value_type": "STRING", + }, + } + ) + + Args: + feature_configs (Dict[str, Dict[str, Union[bool, int, Dict[str, str], str]]]): + Required. A user defined Dict containing configurations for feature creation. + + The feature_configs Dict[str, Dict] i.e. {feature_id: feature_config} contains configuration for each creating feature: + Example: + feature_configs = { + "my_feature_id_1": feature_config_1, + "my_feature_id_2": feature_config_2, + "my_feature_id_3": feature_config_3, + } + + Each feature_config requires "value_type", and optional "description", "labels": + Example: + feature_config_1 = { + "value_type": "INT64", + } + feature_config_2 = { + "value_type": "BOOL", + "description": "my feature id 2 description" + } + feature_config_3 = { + "value_type": "STRING", + "labels": { + "my key": "my value", + } + } + + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + sync (bool): + Optional. Whether to execute this creation synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + EntityType - entity_type resource object + """ + create_feature_requests = self._validate_and_get_create_feature_requests( + feature_configs=feature_configs + ) + + _LOGGER.log_action_start_against_resource( + "Batch creating features", "entityType", self, + ) + + batch_created_features_lro = self.api_client.batch_create_features( + parent=self.resource_name, + requests=create_feature_requests, + metadata=request_metadata, + ) + + _LOGGER.log_action_started_against_resource_with_lro( + "Batch create Features", + "entityType", + self.__class__, + batch_created_features_lro, + ) + + batch_created_features_lro.result() + + _LOGGER.log_action_completed_against_resource( + "entityType", "Batch created features", self + ) + + return self + + def _validate_and_get_import_feature_values_request( + self, + feature_ids: List[str], + feature_time: Union[str, datetime.datetime], + data_source: Union[gca_io.AvroSource, gca_io.BigQuerySource, gca_io.CsvSource], + feature_source_fields: Optional[Dict[str, str]] = None, + entity_id_field: Optional[str] = None, + disable_online_serving: Optional[bool] = None, + worker_count: Optional[int] = None, + ) -> gca_featurestore_service.ImportFeatureValuesRequest: + """Validates and get import feature values request. + Args: + feature_ids (List[str]): + Required. IDs of the Feature to import values + of. The Features must exist in the target + EntityType, or the request will fail. + feature_time (Union[str, datetime.datetime]): + Required. The feature_time can be one of: + - The source column that holds the Feature + timestamp for all Feature values in each entity. + - A single Feature timestamp for all entities + being imported. The timestamp must not have + higher than millisecond precision. + data_source (Union[gca_io.AvroSource, gca_io.BiqQuerySource, gca_io.CsvSource]): + Required. The data_source can be one of: + - AvroSource + - BiqQuerySource + - CsvSource + feature_source_fields (Dict[str, str]): + Optional. User defined dictionary to map ID of the Feature for importing values + of to the source column for getting the Feature values from. + + Specify the features whose ID and source column are not the same. + If not provided, the source column need to be the same as the Feature ID. + + Example: + + feature_ids = ['my_feature_id_1', 'my_feature_id_2', 'my_feature_id_3'] + + In case all features' source field and ID match: + feature_source_fields = None or {} + + In case all features' source field and ID do not match: + feature_source_fields = { + 'my_feature_id_1': 'my_feature_id_1_source_field', + 'my_feature_id_2': 'my_feature_id_2_source_field', + 'my_feature_id_3': 'my_feature_id_3_source_field', + } + + In case some features' source field and ID do not match: + feature_source_fields = { + 'my_feature_id_1': 'my_feature_id_1_source_field', + } + entity_id_field (str): + Optional. Source column that holds entity IDs. If not provided, entity + IDs are extracted from the column named ``entity_id``. + disable_online_serving (bool): + Optional. If set, data will not be imported for online + serving. This is typically used for backfilling, + where Feature generation timestamps are not in + the timestamp range needed for online serving. + worker_count (int): + Optional. Specifies the number of workers that are used + to write data to the Featurestore. Consider the + online serving capacity that you require to + achieve the desired import throughput without + interfering with online serving. The value must + be positive, and less than or equal to 100. If + not set, defaults to using 1 worker. The low + count ensures minimal impact on online serving + performance. + Returns: + gca_featurestore_service.ImportFeatureValuesRequest - request message for importing feature values + Raises: + ValueError if data_source type is not supported + ValueError if feature_time type is not supported + """ + feature_source_fields = feature_source_fields or {} + feature_specs = [ + gca_featurestore_service.ImportFeatureValuesRequest.FeatureSpec( + id=feature_id, source_field=feature_source_fields.get(feature_id) + ) + for feature_id in set(feature_ids) + ] + + import_feature_values_request = gca_featurestore_service.ImportFeatureValuesRequest( + entity_type=self.resource_name, + feature_specs=feature_specs, + entity_id_field=entity_id_field, + disable_online_serving=disable_online_serving, + worker_count=worker_count, + ) + + if isinstance(data_source, gca_io.AvroSource): + import_feature_values_request.avro_source = data_source + elif isinstance(data_source, gca_io.BigQuerySource): + import_feature_values_request.bigquery_source = data_source + elif isinstance(data_source, gca_io.CsvSource): + import_feature_values_request.csv_source = data_source + else: + raise ValueError( + f"The type of `data_source` field should be: " + f"`gca_io.AvroSource`, `gca_io.BigQuerySource`, or `gca_io.CsvSource`, " + f"get {type(data_source)} instead. " + ) + + if isinstance(feature_time, str): + import_feature_values_request.feature_time_field = feature_time + elif isinstance(feature_time, datetime.datetime): + import_feature_values_request.feature_time = utils.get_timestamp_proto( + time=feature_time + ) + else: + raise ValueError( + f"The type of `feature_time` field should be: `str` or `datetime.datetime`, " + f"get {type(feature_time)} instead. " + ) + + return import_feature_values_request + + def _import_feature_values( + self, + import_feature_values_request: gca_featurestore_service.ImportFeatureValuesRequest, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + ) -> "EntityType": + """Imports Feature values into the Featurestore from a source storage. + + Args: + import_feature_values_request (gca_featurestore_service.ImportFeatureValuesRequest): + Required. Request message for importing feature values. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + + Returns: + EntityType - The entityType resource object with imported feature values. + """ + _LOGGER.log_action_start_against_resource( + "Importing", "feature values", self, + ) + + import_lro = self.api_client.import_feature_values( + request=import_feature_values_request, metadata=request_metadata, + ) + + _LOGGER.log_action_started_against_resource_with_lro( + "Import", "feature values", self.__class__, import_lro + ) + + import_lro.result() + + _LOGGER.log_action_completed_against_resource( + "feature values", "imported", self + ) + + return self + + @base.optional_sync(return_input_arg="self") + def ingest_from_bq( + self, + feature_ids: List[str], + feature_time: Union[str, datetime.datetime], + bq_source_uri: str, + feature_source_fields: Optional[Dict[str, str]] = None, + entity_id_field: Optional[str] = None, + disable_online_serving: Optional[bool] = None, + worker_count: Optional[int] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + sync: bool = True, + ) -> "EntityType": + """Ingest feature values from BigQuery. + + Args: + feature_ids (List[str]): + Required. IDs of the Feature to import values + of. The Features must exist in the target + EntityType, or the request will fail. + feature_time (Union[str, datetime.datetime]): + Required. The feature_time can be one of: + - The source column that holds the Feature + timestamp for all Feature values in each entity. + - A single Feature timestamp for all entities + being imported. The timestamp must not have + higher than millisecond precision. + bq_source_uri (str): + Required. BigQuery URI to the input table. + Example: + 'bq://project.dataset.table_name' + feature_source_fields (Dict[str, str]): + Optional. User defined dictionary to map ID of the Feature for importing values + of to the source column for getting the Feature values from. + + Specify the features whose ID and source column are not the same. + If not provided, the source column need to be the same as the Feature ID. + + Example: + + feature_ids = ['my_feature_id_1', 'my_feature_id_2', 'my_feature_id_3'] + + In case all features' source field and ID match: + feature_source_fields = None or {} + + In case all features' source field and ID do not match: + feature_source_fields = { + 'my_feature_id_1': 'my_feature_id_1_source_field', + 'my_feature_id_2': 'my_feature_id_2_source_field', + 'my_feature_id_3': 'my_feature_id_3_source_field', + } + + In case some features' source field and ID do not match: + feature_source_fields = { + 'my_feature_id_1': 'my_feature_id_1_source_field', + } + entity_id_field (str): + Optional. Source column that holds entity IDs. If not provided, entity + IDs are extracted from the column named ``entity_id``. + disable_online_serving (bool): + Optional. If set, data will not be imported for online + serving. This is typically used for backfilling, + where Feature generation timestamps are not in + the timestamp range needed for online serving. + worker_count (int): + Optional. Specifies the number of workers that are used + to write data to the Featurestore. Consider the + online serving capacity that you require to + achieve the desired import throughput without + interfering with online serving. The value must + be positive, and less than or equal to 100. If + not set, defaults to using 1 worker. The low + count ensures minimal impact on online serving + performance. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + sync (bool): + Optional. Whether to execute this import synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + EntityType - The entityType resource object with feature values imported. + + """ + bigquery_source = gca_io.BigQuerySource(input_uri=bq_source_uri) + + import_feature_values_request = self._validate_and_get_import_feature_values_request( + feature_ids=feature_ids, + feature_time=feature_time, + data_source=bigquery_source, + feature_source_fields=feature_source_fields, + entity_id_field=entity_id_field, + disable_online_serving=disable_online_serving, + worker_count=worker_count, + ) + + return self._import_feature_values( + import_feature_values_request=import_feature_values_request, + request_metadata=request_metadata, + ) + + @base.optional_sync(return_input_arg="self") + def ingest_from_gcs( + self, + feature_ids: List[str], + feature_time: Union[str, datetime.datetime], + gcs_source_uris: Union[str, List[str]], + gcs_source_type: str, + feature_source_fields: Optional[Dict[str, str]] = None, + entity_id_field: Optional[str] = None, + disable_online_serving: Optional[bool] = None, + worker_count: Optional[int] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + sync: bool = True, + ) -> "EntityType": + """Ingest feature values from GCS. + + Args: + feature_ids (List[str]): + Required. IDs of the Feature to import values + of. The Features must exist in the target + EntityType, or the request will fail. + feature_time (Union[str, datetime.datetime]): + Required. The feature_time can be one of: + - The source column that holds the Feature + timestamp for all Feature values in each entity. + - A single Feature timestamp for all entities + being imported. The timestamp must not have + higher than millisecond precision. + gcs_source_uris (Union[str, List[str]]): + Required. Google Cloud Storage URI(-s) to the + input file(s). May contain wildcards. For more + information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. + Example: + ["gs://my_bucket/my_file_1.csv", "gs://my_bucket/my_file_2.csv"] + or + "gs://my_bucket/my_file.avro" + gcs_source_type (str): + Required. The type of the input file(s) provided by `gcs_source_uris`, + the value of gcs_source_type can only be either `csv`, or `avro`. + feature_source_fields (Dict[str, str]): + Optional. User defined dictionary to map ID of the Feature for importing values + of to the source column for getting the Feature values from. + + Specify the features whose ID and source column are not the same. + If not provided, the source column need to be the same as the Feature ID. + + Example: + + feature_ids = ['my_feature_id_1', 'my_feature_id_2', 'my_feature_id_3'] + + In case all features' source field and ID match: + feature_source_fields = None or {} + + In case all features' source field and ID do not match: + feature_source_fields = { + 'my_feature_id_1': 'my_feature_id_1_source_field', + 'my_feature_id_2': 'my_feature_id_2_source_field', + 'my_feature_id_3': 'my_feature_id_3_source_field', + } + + In case some features' source field and ID do not match: + feature_source_fields = { + 'my_feature_id_1': 'my_feature_id_1_source_field', + } + entity_id_field (str): + Optional. Source column that holds entity IDs. If not provided, entity + IDs are extracted from the column named ``entity_id``. + disable_online_serving (bool): + Optional. If set, data will not be imported for online + serving. This is typically used for backfilling, + where Feature generation timestamps are not in + the timestamp range needed for online serving. + worker_count (int): + Optional. Specifies the number of workers that are used + to write data to the Featurestore. Consider the + online serving capacity that you require to + achieve the desired import throughput without + interfering with online serving. The value must + be positive, and less than or equal to 100. If + not set, defaults to using 1 worker. The low + count ensures minimal impact on online serving + performance. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + sync (bool): + Optional. Whether to execute this import synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + EntityType - The entityType resource object with feature values imported. + + Raises: + ValueError if gcs_source_type is not supported. + """ + if gcs_source_type not in featurestore_utils.GCS_SOURCE_TYPE: + raise ValueError( + "Only %s are supported gcs_source_type, not `%s`. " + % ( + "`" + "`, `".join(featurestore_utils.GCS_SOURCE_TYPE) + "`", + gcs_source_type, + ) + ) + + if isinstance(gcs_source_uris, str): + gcs_source_uris = [gcs_source_uris] + gcs_source = gca_io.GcsSource(uris=gcs_source_uris) + + if gcs_source_type == "csv": + data_source = gca_io.CsvSource(gcs_source=gcs_source) + if gcs_source_type == "avro": + data_source = gca_io.AvroSource(gcs_source=gcs_source) + + import_feature_values_request = self._validate_and_get_import_feature_values_request( + feature_ids=feature_ids, + feature_time=feature_time, + data_source=data_source, + feature_source_fields=feature_source_fields, + entity_id_field=entity_id_field, + disable_online_serving=disable_online_serving, + worker_count=worker_count, + ) + + return self._import_feature_values( + import_feature_values_request=import_feature_values_request, + request_metadata=request_metadata, + ) diff --git a/google/cloud/aiplatform/featurestore/feature.py b/google/cloud/aiplatform/featurestore/feature.py index ab199d0c57..d41344f086 100644 --- a/google/cloud/aiplatform/featurestore/feature.py +++ b/google/cloud/aiplatform/featurestore/feature.py @@ -36,10 +36,22 @@ class Feature(base.VertexAiResourceNounWithFutureManager): client_class = utils.FeaturestoreClientWithOverride _is_client_prediction_client = False - _resource_noun = None + _resource_noun = "features" _getter_method = "get_feature" _list_method = "list_features" _delete_method = "delete_feature" + _parse_resource_name_method = "parse_feature_path" + _format_resource_name_method = "feature_path" + + @staticmethod + def _resource_id_validator(resource_id: str): + """Validates resource ID. + + Args: + resource_id(str): + The resource id to validate. + """ + featurestore_utils.validate_feature_id(resource_id) def __init__( self, @@ -71,9 +83,12 @@ def __init__( Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id/entityTypes/my_entity_type_id/features/my_feature_id" or "my_feature_id" when project and location are initialized or passed, with featurestore_id and entity_type_id passed. featurestore_id (str): - Optional. Featurestore ID to retrieve feature from, when feature_name is passed as Feature ID. + Optional. Featurestore ID of an existing featurestore to retrieve feature from, + when feature_name is passed as Feature ID. entity_type_id (str): - Optional. EntityType ID to retrieve feature from, when feature_name is passed as Feature ID. + Optional. EntityType ID of an existing entityType to retrieve feature from, + when feature_name is passed as Feature ID. + The EntityType must exist in the Featurestore if provided by the featurestore_id. project (str): Optional. Project to retrieve feature from. If not set, project set in aiplatform.init will be used. @@ -83,21 +98,14 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve this Feature. Overrides credentials set in aiplatform.init. + Raises: + ValueError: If only one of featurestore_id or entity_type_id is provided. """ - ( - featurestore_id, - entity_type_id, - _, - ) = featurestore_utils.validate_and_get_feature_resource_ids( - feature_name=feature_name, - entity_type_id=entity_type_id, - featurestore_id=featurestore_id, - ) - # TODO(b/208269923): Temporary workaround, update when base class supports nested resource - self._resource_noun = ( - f"featurestores/{featurestore_id}/entityTypes/{entity_type_id}/features" - ) + if bool(featurestore_id) != bool(entity_type_id): + raise ValueError( + "featurestore_id and entity_type_id must both be provided or ommitted." + ) super().__init__( project=project, @@ -105,16 +113,22 @@ def __init__( credentials=credentials, resource_name=feature_name, ) - self._gca_resource = self._get_gca_resource(resource_name=feature_name) + self._gca_resource = self._get_gca_resource( + resource_name=feature_name, + parent_resource_name_fields={ + featurestore.Featurestore._resource_noun: featurestore_id, + featurestore.EntityType._resource_noun: entity_type_id, + } + if featurestore_id + else featurestore_id, + ) @property def featurestore_name(self) -> str: """Full qualified resource name of the managed featurestore in which this Feature is.""" - feature_path_components = featurestore_utils.CompatFeaturestoreServiceClient.parse_feature_path( - path=self.resource_name - ) + feature_path_components = self._parse_resource_name(self.resource_name) - return featurestore_utils.CompatFeaturestoreServiceClient.featurestore_path( + return featurestore.Featurestore._format_resource_name( project=feature_path_components["project"], location=feature_path_components["location"], featurestore=feature_path_components["featurestore"], @@ -131,11 +145,9 @@ def get_featurestore(self) -> "featurestore.Featurestore": @property def entity_type_name(self) -> str: """Full qualified resource name of the managed entityType in which this Feature is.""" - feature_path_components = featurestore_utils.CompatFeaturestoreServiceClient.parse_feature_path( - path=self.resource_name - ) + feature_path_components = self._parse_resource_name(self.resource_name) - return featurestore_utils.CompatFeaturestoreServiceClient.entity_type_path( + return featurestore.EntityType._format_resource_name( project=feature_path_components["project"], location=feature_path_components["location"], featurestore=feature_path_components["featurestore"], @@ -252,11 +264,13 @@ def list( Args: entity_type_name (str): - Required. A fully-qualified entityType resource name or an entity_type ID to list features in + Required. A fully-qualified entityType resource name or an entity_type ID of an existing entityType + to list features in. The EntityType must exist in the Featurestore if provided by the featurestore_id. Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id/entityTypes/my_entity_type_id" or "my_entity_type_id" when project and location are initialized or passed, with featurestore_id passed. featurestore_id (str): - Optional. Featurestore ID to list features in, when entity_type_name is passed as entity_type ID. + Optional. Featurestore ID of an existing featurestore to list features in, + when entity_type_name is passed as entity_type ID. filter (str): Optional. Lists the Features that match the filter expression. The following filters are supported: @@ -303,12 +317,6 @@ def list( Returns: List[Feature] - A list of managed feature resource objects """ - ( - featurestore_id, - entity_type_id, - ) = featurestore_utils.validate_and_get_entity_type_resource_ids( - entity_type_name=entity_type_name, featurestore_id=featurestore_id, - ) return cls._list( filter=filter, @@ -318,9 +326,17 @@ def list( credentials=credentials, parent=utils.full_resource_name( resource_name=entity_type_name, - resource_noun=f"featurestores/{featurestore_id}/entityTypes", + resource_noun=featurestore.EntityType._resource_noun, + parse_resource_name_method=featurestore.EntityType._parse_resource_name, + format_resource_name_method=featurestore.EntityType._format_resource_name, + parent_resource_name_fields={ + featurestore.Featurestore._resource_noun: featurestore_id + } + if featurestore_id + else featurestore_id, project=project, location=location, + resource_id_validator=featurestore.EntityType._resource_id_validator, ), ) @@ -461,3 +477,144 @@ def search( ) for gapic_resource in resource_list ] + + @classmethod + @base.optional_sync() + def create( + cls, + feature_id: str, + value_type: str, + entity_type_name: str, + featurestore_id: Optional[str] = None, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + sync: bool = True, + ) -> "Feature": + """Creates a Feature resource in an EntityType. + + Example Usage: + + my_feature = aiplatform.Feature.create( + feature_id='my_feature_id', + value_type='INT64', + entity_type_name='projects/123/locations/us-central1/featurestores/my_featurestore_id/\ + entityTypes/my_entity_type_id' + ) + or + my_feature = aiplatform.Feature.create( + feature_id='my_feature_id', + value_type='INT64', + entity_type_name='my_entity_type_id', + featurestore_id='my_featurestore_id', + ) + + Args: + feature_id (str): + Required. The ID to use for the Feature, which will become + the final component of the Feature's resource name, which is immutable. + + This value may be up to 60 characters, and valid characters + are ``[a-z0-9_]``. The first character cannot be a number. + + The value must be unique within an EntityType. + value_type (str): + Required. Immutable. Type of Feature value. + One of BOOL, BOOL_ARRAY, DOUBLE, DOUBLE_ARRAY, INT64, INT64_ARRAY, STRING, STRING_ARRAY, BYTES. + entity_type_name (str): + Required. A fully-qualified entityType resource name or an entity_type ID of an existing entityType + to create Feature in. The EntityType must exist in the Featurestore if provided by the featurestore_id. + Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id/entityTypes/my_entity_type_id" + or "my_entity_type_id" when project and location are initialized or passed, with featurestore_id passed. + featurestore_id (str): + Optional. Featurestore ID of an existing featurestore to create Feature in + if `entity_type_name` is passed an entity_type ID. + description (str): + Optional. Description of the Feature. + labels (Dict[str, str]): + Optional. The labels with user-defined + metadata to organize your Features. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + on and examples of labels. No more than 64 user + labels can be associated with one Feature + (System labels are excluded)." + System reserved label keys are prefixed with + "aiplatform.googleapis.com/" and are immutable. + project (str): + Optional. Project to create Feature in if `entity_type_name` is passed an entity_type ID. + If not set, project set in aiplatform.init will be used. + location (str): + Optional. Location to create Feature in if `entity_type_name` is passed an entity_type ID. + If not set, location set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to create Features. Overrides + credentials set in aiplatform.init. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + sync (bool): + Optional. Whether to execute this creation synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + Feature - feature resource object + + """ + entity_type_name = utils.full_resource_name( + resource_name=entity_type_name, + resource_noun=featurestore.EntityType._resource_noun, + parse_resource_name_method=featurestore.EntityType._parse_resource_name, + format_resource_name_method=featurestore.EntityType._format_resource_name, + parent_resource_name_fields={ + featurestore.Featurestore._resource_noun: featurestore_id + } + if featurestore_id + else featurestore_id, + project=project, + location=location, + resource_id_validator=featurestore.EntityType._resource_id_validator, + ) + entity_type_name_components = featurestore.EntityType._parse_resource_name( + entity_type_name + ) + + feature_config = featurestore_utils._FeatureConfig( + feature_id=feature_id, + value_type=value_type, + description=description, + labels=labels, + ) + + create_feature_request = feature_config.get_create_feature_request() + create_feature_request.parent = entity_type_name + + api_client = cls._instantiate_client( + location=entity_type_name_components["location"], credentials=credentials, + ) + + created_feature_lro = api_client.create_feature( + request=create_feature_request, metadata=request_metadata, + ) + + _LOGGER.log_create_with_lro(cls, created_feature_lro) + + created_feature = created_feature_lro.result() + + _LOGGER.log_create_complete(cls, created_feature, "feature") + + feature_obj = cls( + feature_name=created_feature.name, + project=project, + location=location, + credentials=credentials, + ) + + return feature_obj diff --git a/google/cloud/aiplatform/featurestore/featurestore.py b/google/cloud/aiplatform/featurestore/featurestore.py index d3bb0a0c11..d799e22963 100644 --- a/google/cloud/aiplatform/featurestore/featurestore.py +++ b/google/cloud/aiplatform/featurestore/featurestore.py @@ -23,6 +23,7 @@ from google.cloud.aiplatform import base from google.cloud.aiplatform.compat.types import featurestore as gca_featurestore from google.cloud.aiplatform import featurestore +from google.cloud.aiplatform import initializer from google.cloud.aiplatform import utils from google.cloud.aiplatform.utils import featurestore_utils @@ -39,6 +40,18 @@ class Featurestore(base.VertexAiResourceNounWithFutureManager): _getter_method = "get_featurestore" _list_method = "list_featurestores" _delete_method = "delete_featurestore" + _parse_resource_name_method = "parse_featurestore_path" + _format_resource_name_method = "featurestore_path" + + @staticmethod + def _resource_id_validator(resource_id: str): + """Validates resource ID. + + Args: + resource_id(str): + The resource id to validate. + """ + featurestore_utils.validate_id(resource_id) def __init__( self, @@ -92,12 +105,10 @@ def get_entity_type(self, entity_type_id: str) -> "featurestore.EntityType": Returns: featurestore.EntityType - The managed entityType resource object. """ - featurestore_name_components = featurestore_utils.CompatFeaturestoreServiceClient.parse_featurestore_path( - path=self.resource_name - ) + featurestore_name_components = self._parse_resource_name(self.resource_name) return featurestore.EntityType( - entity_type_name=featurestore_utils.CompatFeaturestoreServiceClient.entity_type_path( + entity_type_name=featurestore.EntityType._format_resource_name( project=featurestore_name_components["project"], location=featurestore_name_components["location"], featurestore=featurestore_name_components["featurestore"], @@ -301,7 +312,7 @@ def list_entity_types( @base.optional_sync() def delete_entity_types( - self, entity_type_ids: List[str], sync: bool = True, + self, entity_type_ids: List[str], sync: bool = True, force: bool = False, ) -> None: """Deletes entity_type resources in this Featurestore given their entity_type IDs. WARNING: This deletion is permanent. @@ -313,12 +324,236 @@ def delete_entity_types( Optional. Whether to execute this deletion synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. + force (bool): + Optional. If force is set to True, all features in each entityType + will be deleted prior to entityType deletion. Default is False. """ entity_types = [] for entity_type_id in entity_type_ids: entity_type = self.get_entity_type(entity_type_id=entity_type_id) - entity_type.delete(sync=False) + entity_type.delete(force=force, sync=False) entity_types.append(entity_type) for entity_type in entity_types: entity_type.wait() + + @base.optional_sync() + def delete(self, sync: bool = True, force: bool = False) -> None: + """Deletes this Featurestore resource. If force is set to True, + all entityTypes in this Featurestore will be deleted prior to featurestore deletion, + and all features in each entityType will be deleted prior to each entityType deletion. + + WARNING: This deletion is permanent. + + Args: + force (bool): + If set to true, any EntityTypes and + Features for this Featurestore will also + be deleted. (Otherwise, the request will + only work if the Featurestore has no + EntityTypes.) + sync (bool): + Whether to execute this deletion synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + """ + _LOGGER.log_action_start_against_resource("Deleting", "", self) + lro = getattr(self.api_client, self._delete_method)( + name=self.resource_name, force=force + ) + _LOGGER.log_action_started_against_resource_with_lro( + "Delete", "", self.__class__, lro + ) + lro.result() + _LOGGER.log_action_completed_against_resource("deleted.", "", self) + + @classmethod + @base.optional_sync() + def create( + cls, + featurestore_id: str, + online_store_fixed_node_count: Optional[int] = None, + labels: Optional[Dict[str, str]] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + encryption_spec_key_name: Optional[str] = None, + sync: bool = True, + ) -> "Featurestore": + """Creates a Featurestore resource. + + Example Usage: + + my_entity_type = aiplatform.EntityType.create( + entity_type_id='my_entity_type_id', + featurestore_name='projects/123/locations/us-central1/featurestores/my_featurestore_id' + ) + or + my_entity_type = aiplatform.EntityType.create( + entity_type_id='my_entity_type_id', + featurestore_name='my_featurestore_id', + ) + + Args: + featurestore_id (str): + Required. The ID to use for this Featurestore, which will + become the final component of the Featurestore's resource + name. + + This value may be up to 60 characters, and valid characters + are ``[a-z0-9_]``. The first character cannot be a number. + + The value must be unique within the project and location. + online_store_fixed_node_count (int): + Optional. Config for online serving resources. + When not specified, default node count is 1. The + number of nodes will not scale automatically but + can be scaled manually by providing different + values when updating. + labels (Dict[str, str]): + Optional. The labels with user-defined + metadata to organize your Featurestore. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + on and examples of labels. No more than 64 user + labels can be associated with one + Featurestore(System labels are excluded)." + System reserved label keys are prefixed with + "aiplatform.googleapis.com/" and are immutable. + project (str): + Optional. Project to create EntityType in. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to create EntityType in. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to create EntityTypes. Overrides + credentials set in aiplatform.init. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + encryption_spec (str): + Optional. Customer-managed encryption key + spec for data storage. If set, both of the + online and offline data storage will be secured + by this key. + sync (bool): + Optional. Whether to execute this creation synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + Featurestore - Featurestore resource object + + """ + gapic_featurestore = gca_featurestore.Featurestore( + online_serving_config=gca_featurestore.Featurestore.OnlineServingConfig( + fixed_node_count=online_store_fixed_node_count or 1 + ) + ) + + if labels: + utils.validate_labels(labels) + gapic_featurestore.labels = labels + + if encryption_spec_key_name: + gapic_featurestore.encryption_spec = initializer.global_config.get_encryption_spec( + encryption_spec_key_name=encryption_spec_key_name + ) + + api_client = cls._instantiate_client(location=location, credentials=credentials) + + created_featurestore_lro = api_client.create_featurestore( + parent=initializer.global_config.common_location_path( + project=project, location=location + ), + featurestore=gapic_featurestore, + featurestore_id=featurestore_id, + metadata=request_metadata, + ) + + _LOGGER.log_create_with_lro(cls, created_featurestore_lro) + + created_featurestore = created_featurestore_lro.result() + + _LOGGER.log_create_complete(cls, created_featurestore, "featurestore") + + featurestore_obj = cls( + featurestore_name=created_featurestore.name, + project=project, + location=location, + credentials=credentials, + ) + + return featurestore_obj + + def create_entity_type( + self, + entity_type_id: str, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + sync: bool = True, + ) -> "featurestore.EntityType": + """Creates an EntityType resource in this Featurestore. + + Example Usage: + + my_featurestore = aiplatform.Featurestore.create( + featurestore_id='my_featurestore_id' + ) + my_entity_type = my_featurestore.create_entity_type( + entity_type_id='my_entity_type_id', + ) + + Args: + entity_type_id (str): + Required. The ID to use for the EntityType, which will + become the final component of the EntityType's resource + name. + + This value may be up to 60 characters, and valid characters + are ``[a-z0-9_]``. The first character cannot be a number. + + The value must be unique within a featurestore. + description (str): + Optional. Description of the EntityType. + labels (Dict[str, str]): + Optional. The labels with user-defined + metadata to organize your EntityTypes. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + on and examples of labels. No more than 64 user + labels can be associated with one EntityType + (System labels are excluded)." + System reserved label keys are prefixed with + "aiplatform.googleapis.com/" and are immutable. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + sync (bool): + Optional. Whether to execute this creation synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + featurestore.EntityType - EntityType resource object + + """ + return featurestore.EntityType.create( + entity_type_id=entity_type_id, + featurestore_name=self.resource_name, + description=description, + labels=labels, + request_metadata=request_metadata, + sync=sync, + ) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 1ad70faece..2ce9d8bbb9 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -82,7 +82,6 @@ class _Job(base.VertexAiResourceNounWithFutureManager): """ client_class = utils.JobClientWithOverride - _is_client_prediction_client = False def __init__( self, @@ -167,8 +166,11 @@ def _cancel_method(cls) -> str: def _dashboard_uri(self) -> Optional[str]: """Helper method to compose the dashboard uri where job can be viewed.""" - fields = utils.extract_fields_from_resource_name(self.resource_name) - url = f"https://console.cloud.google.com/ai/platform/locations/{fields.location}/{self._job_type}/{fields.id}?project={fields.project}" + fields = self._parse_resource_name(self.resource_name) + location = fields.pop("location") + project = fields.pop("project") + job = list(fields.values())[0] + url = f"https://console.cloud.google.com/ai/platform/locations/{location}/{self._job_type}/{job}?project={project}" return url def _log_job_state(self): @@ -279,6 +281,8 @@ class BatchPredictionJob(_Job): _cancel_method = "cancel_batch_prediction_job" _delete_method = "delete_batch_prediction_job" _job_type = "batch-predictions" + _parse_resource_name_method = "parse_batch_prediction_job_path" + _format_resource_name_method = "batch_prediction_job_path" def __init__( self, @@ -533,6 +537,8 @@ def create( model_name = utils.full_resource_name( resource_name=model_name, resource_noun="models", + parse_resource_name_method=aiplatform.Model._parse_resource_name, + format_resource_name_method=aiplatform.Model._format_resource_name, project=project, location=location, ) @@ -952,6 +958,8 @@ class DataLabelingJob(_Job): _cancel_method = "cancel_data_labeling_job" _delete_method = "delete_data_labeling_job" _job_type = "labeling-tasks" + _parse_resource_name_method = "parse_data_labeling_job_path" + _format_resource_name_method = "data_labeling_job_path" pass @@ -963,6 +971,8 @@ class CustomJob(_RunnableJob): _list_method = "list_custom_jobs" _cancel_method = "cancel_custom_job" _delete_method = "delete_custom_job" + _parse_resource_name_method = "parse_custom_job_path" + _format_resource_name_method = "custom_job_path" _job_type = "training" def __init__( @@ -1436,6 +1446,8 @@ class HyperparameterTuningJob(_RunnableJob): _list_method = "list_hyperparameter_tuning_jobs" _cancel_method = "cancel_hyperparameter_tuning_job" _delete_method = "delete_hyperparameter_tuning_job" + _parse_resource_name_method = "parse_hyperparameter_tuning_job_path" + _format_resource_name_method = "hyperparameter_tuning_job_path" _job_type = "training" def __init__( diff --git a/google/cloud/aiplatform/metadata/artifact.py b/google/cloud/aiplatform/metadata/artifact.py index 13ac2aafbf..41a6e73949 100644 --- a/google/cloud/aiplatform/metadata/artifact.py +++ b/google/cloud/aiplatform/metadata/artifact.py @@ -31,6 +31,8 @@ class _Artifact(resource._Resource): _resource_noun = "artifacts" _getter_method = "get_artifact" _delete_method = "delete_artifact" + _parse_resource_name_method = "parse_artifact_path" + _format_resource_name_method = "artifact_path" @classmethod def _create_resource( diff --git a/google/cloud/aiplatform/metadata/context.py b/google/cloud/aiplatform/metadata/context.py index 670d8af844..f2868f4f55 100644 --- a/google/cloud/aiplatform/metadata/context.py +++ b/google/cloud/aiplatform/metadata/context.py @@ -31,6 +31,8 @@ class _Context(resource._Resource): _resource_noun = "contexts" _getter_method = "get_context" _delete_method = "delete_context" + _parse_resource_name_method = "parse_context_path" + _format_resource_name_method = "context_path" def add_artifacts_and_executions( self, diff --git a/google/cloud/aiplatform/metadata/execution.py b/google/cloud/aiplatform/metadata/execution.py index 50655d0175..c02fea6476 100644 --- a/google/cloud/aiplatform/metadata/execution.py +++ b/google/cloud/aiplatform/metadata/execution.py @@ -34,6 +34,8 @@ class _Execution(resource._Resource): _resource_noun = "executions" _getter_method = "get_execution" _delete_method = "delete_execution" + _parse_resource_name_method = "parse_execution_path" + _format_resource_name_method = "execution_path" def add_artifact( self, artifact_resource_name: str, input: bool, diff --git a/google/cloud/aiplatform/metadata/metadata_store.py b/google/cloud/aiplatform/metadata/metadata_store.py index 82b6742df5..c7cbcaa68c 100644 --- a/google/cloud/aiplatform/metadata/metadata_store.py +++ b/google/cloud/aiplatform/metadata/metadata_store.py @@ -35,6 +35,8 @@ class _MetadataStore(base.VertexAiResourceNounWithFutureManager): _resource_noun = "metadataStores" _getter_method = "get_metadata_store" _delete_method = "delete_metadata_store" + _parse_resource_name_method = "parse_metadata_store_path" + _format_resource_name_method = "metadata_store_path" def __init__( self, diff --git a/google/cloud/aiplatform/metadata/resource.py b/google/cloud/aiplatform/metadata/resource.py index 2727513234..5f76eddcb1 100644 --- a/google/cloud/aiplatform/metadata/resource.py +++ b/google/cloud/aiplatform/metadata/resource.py @@ -26,6 +26,7 @@ from google.auth import credentials as auth_credentials from google.cloud.aiplatform import base, initializer +from google.cloud.aiplatform import metadata from google.cloud.aiplatform import utils from google.cloud.aiplatform.compat.types import artifact as gca_artifact from google.cloud.aiplatform.compat.types import context as gca_context @@ -36,7 +37,6 @@ class _Resource(base.VertexAiResourceNounWithFutureManager, abc.ABC): """Metadata Resource for Vertex AI""" client_class = utils.MetadataClientWithOverride - _is_client_prediction_client = False _delete_method = None def __init__( @@ -81,21 +81,22 @@ def __init__( if resource: self._gca_resource = resource - return - - full_resource_name = resource_name - # Construct the full_resource_name if input resource_name is the resource_id - if "/" not in resource_name: + else: full_resource_name = utils.full_resource_name( resource_name=resource_name, - resource_noun=f"metadataStores/{metadata_store_id}/{self._resource_noun}", + resource_noun=self._resource_noun, + parse_resource_name_method=self._parse_resource_name, + format_resource_name_method=self._format_resource_name, + parent_resource_name_fields={ + metadata.metadata_store._MetadataStore._resource_noun: metadata_store_id + }, project=self.project, location=self.location, ) - self._gca_resource = getattr(self.api_client, self._getter_method)( - name=full_resource_name, retry=base._DEFAULT_RETRY - ) + self._gca_resource = getattr(self.api_client, self._getter_method)( + name=full_resource_name, retry=base._DEFAULT_RETRY + ) @property def metadata(self) -> Dict: diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 6aca4f8c27..27cb8234b6 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -87,11 +87,12 @@ class Prediction(NamedTuple): class Endpoint(base.VertexAiResourceNounWithFutureManager): client_class = utils.EndpointClientWithOverride - _is_client_prediction_client = False _resource_noun = "endpoints" _getter_method = "get_endpoint" _list_method = "list_endpoints" _delete_method = "delete_endpoint" + _parse_resource_name_method = "parse_endpoint_path" + _format_resource_name_method = "endpoint_path" def __init__( self, @@ -128,6 +129,8 @@ def __init__( endpoint_name = utils.full_resource_name( resource_name=endpoint_name, resource_noun="endpoints", + parse_resource_name_method=self._parse_resource_name, + format_resource_name_method=self._format_resource_name, project=project, location=location, ) @@ -994,21 +997,22 @@ def undeploy( ) -> None: """Undeploys a deployed model. - Proportionally adjusts the traffic_split among the remaining deployed - models of the endpoint. + The model to be undeployed should have no traffic or user must provide + a new traffic_split with the remaining deployed models. Refer + to `Endpoint.traffic_split` for the current traffic split mapping. Args: deployed_model_id (str): Required. The ID of the DeployedModel to be undeployed from the Endpoint. traffic_split (Dict[str, int]): - Optional. A map from a DeployedModel's ID to the percentage of + Optional. A map of DeployedModel IDs to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. - If a DeployedModel's ID is not listed in this map, then it receives - no traffic. The traffic percentage values must add up to 100, or - map must be empty if the Endpoint is to not accept any traffic at - the moment. Key for model being deployed is "0". Should not be - provided if traffic_percentage is provided. + Required if undeploying a model with non-zero traffic from an Endpoint + with multiple deployed models. The traffic percentage values must add + up to 100, or map must be empty if the Endpoint is to not accept any traffic + at the moment. If a DeployedModel's ID is not listed in this map, then it + receives no traffic. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. @@ -1023,6 +1027,19 @@ def undeploy( "Sum of all traffic within traffic split needs to be 100." ) + # Two or more models deployed to Endpoint and remaining traffic will be zero + elif ( + len(self.traffic_split) > 1 + and deployed_model_id in self._gca_resource.traffic_split + and self._gca_resource.traffic_split[deployed_model_id] == 100 + ): + raise ValueError( + f"Undeploying deployed model '{deployed_model_id}' would leave the remaining " + "traffic split at 0%. Traffic split must add up to 100% when models are " + "deployed. Please undeploy the other models first or provide an updated " + "traffic_split." + ) + self._undeploy( deployed_model_id=deployed_model_id, traffic_split=traffic_split, @@ -1279,8 +1296,13 @@ def undeploy_all(self, sync: bool = True) -> "Endpoint": """ self._sync_gca_resource() - for deployed_model in self._gca_resource.deployed_models: - self._undeploy(deployed_model_id=deployed_model.id, sync=sync) + models_to_undeploy = sorted( # Undeploy zero traffic models first + self._gca_resource.traffic_split.keys(), + key=lambda id: self._gca_resource.traffic_split[id], + ) + + for deployed_model in models_to_undeploy: + self._undeploy(deployed_model_id=deployed_model, sync=sync) return self @@ -1308,11 +1330,12 @@ def delete(self, force: bool = False, sync: bool = True) -> None: class Model(base.VertexAiResourceNounWithFutureManager): client_class = utils.ModelClientWithOverride - _is_client_prediction_client = False _resource_noun = "models" _getter_method = "get_model" _list_method = "list_models" _delete_method = "delete_model" + _parse_resource_name_method = "parse_model_path" + _format_resource_name_method = "model_path" @property def uri(self) -> Optional[str]: diff --git a/google/cloud/aiplatform/pipeline_jobs.py b/google/cloud/aiplatform/pipeline_jobs.py index 5679a58be6..c756589513 100644 --- a/google/cloud/aiplatform/pipeline_jobs.py +++ b/google/cloud/aiplatform/pipeline_jobs.py @@ -79,12 +79,12 @@ def _set_enable_caching_value( class PipelineJob(base.VertexAiResourceNounWithFutureManager): client_class = utils.PipelineJobClientWithOverride - _is_client_prediction_client = False - _resource_noun = "pipelineJobs" _delete_method = "delete_pipeline_job" _getter_method = "get_pipeline_job" _list_method = "list_pipeline_jobs" + _parse_resource_name_method = "parse_pipeline_job_path" + _format_resource_name_method = "pipeline_job_path" def __init__( self, @@ -315,8 +315,8 @@ def has_failed(self) -> bool: def _dashboard_uri(self) -> str: """Helper method to compose the dashboard uri where pipeline can be viewed.""" - fields = utils.extract_fields_from_resource_name(self.resource_name) - url = f"https://console.cloud.google.com/vertex-ai/locations/{fields.location}/pipelines/runs/{fields.id}?project={fields.project}" + fields = self._parse_resource_name(self.resource_name) + url = f"https://console.cloud.google.com/vertex-ai/locations/{fields['location']}/pipelines/runs/{fields['pipeline_job']}?project={fields['project']}" return url def _block_until_complete(self): diff --git a/google/cloud/aiplatform/tensorboard/__init__.py b/google/cloud/aiplatform/tensorboard/__init__.py index f4b1c0b105..63281fe972 100644 --- a/google/cloud/aiplatform/tensorboard/__init__.py +++ b/google/cloud/aiplatform/tensorboard/__init__.py @@ -15,7 +15,11 @@ # limitations under the License. # -from google.cloud.aiplatform.tensorboard.tensorboard_resource import Tensorboard +from google.cloud.aiplatform.tensorboard.tensorboard_resource import ( + Tensorboard, + TensorboardExperiment, + TensorboardRun, +) -__all__ = ("Tensorboard",) +__all__ = ("Tensorboard", "TensorboardExperiment", "TensorboardRun") diff --git a/google/cloud/aiplatform/tensorboard/tensorboard_resource.py b/google/cloud/aiplatform/tensorboard/tensorboard_resource.py index 1e41cc9755..5871bae832 100644 --- a/google/cloud/aiplatform/tensorboard/tensorboard_resource.py +++ b/google/cloud/aiplatform/tensorboard/tensorboard_resource.py @@ -15,28 +15,36 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple +from typing import Dict, List, Optional, Sequence, Tuple from google.auth import credentials as auth_credentials from google.protobuf import field_mask_pb2 from google.cloud.aiplatform import base from google.cloud.aiplatform.compat.types import tensorboard as gca_tensorboard +from google.cloud.aiplatform.compat.types import ( + tensorboard_experiment as gca_tensorboard_experiment, + tensorboard_run as gca_tensorboard_run, +) from google.cloud.aiplatform import initializer from google.cloud.aiplatform import utils _LOGGER = base.Logger(__name__) -class Tensorboard(base.VertexAiResourceNounWithFutureManager): +class _TensorboardServiceResource(base.VertexAiResourceNounWithFutureManager): + client_class = utils.TensorboardClientWithOverride + + +class Tensorboard(_TensorboardServiceResource): """Managed tensorboard resource for Vertex AI.""" - client_class = utils.TensorboardClientWithOverride - _is_client_prediction_client = False _resource_noun = "tensorboards" _getter_method = "get_tensorboard" _list_method = "list_tensorboards" _delete_method = "delete_tensorboard" + _parse_resource_name_method = "parse_tensorboard_path" + _format_resource_name_method = "tensorboard_path" def __init__( self, @@ -172,12 +180,7 @@ def create( _LOGGER.log_create_complete(cls, created_tensorboard, "tb") - return cls( - tensorboard_name=created_tensorboard.name, - project=project or initializer.global_config.project, - location=location or initializer.global_config.location, - credentials=credentials, - ) + return cls(tensorboard_name=created_tensorboard.name, credentials=credentials,) def update( self, @@ -229,8 +232,7 @@ def update( Overrides encryption_spec_key_name set in aiplatform.init. Returns: - tensorboard (Tensorboard): - The managed tensorboard resource. + Tensorboard: The managed tensorboard resource. """ update_mask = list() @@ -281,3 +283,502 @@ def update( _LOGGER.log_action_completed_against_resource("tensorboard", "updated", self) return self + + +class TensorboardExperiment(_TensorboardServiceResource): + """Managed tensorboard resource for Vertex AI.""" + + _resource_noun = "experiments" + _getter_method = "get_tensorboard_experiment" + _list_method = "list_tensorboard_experiments" + _delete_method = "delete_tensorboard_experiment" + _parse_resource_name_method = "parse_tensorboard_experiment_path" + _format_resource_name_method = "tensorboard_experiment_path" + + def __init__( + self, + tensorboard_experiment_name: str, + tensorboard_id: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ): + """Retrieves an existing tensorboard experiment given a tensorboard experiment name or ID. + + Example Usage: + + tb_exp = aiplatform.TensorboardExperiment( + tensorboard_experiment_name= "projects/123/locations/us-central1/tensorboards/456/experiments/678" + ) + + tb_exp = aiplatform.TensorboardExperiment( + tensorboard_experiment_name= "678" + tensorboard_id = "456" + ) + + Args: + tensorboard_experiment_name (str): + Required. A fully-qualified tensorboard experiment resource name or resource ID. + Example: "projects/123/locations/us-central1/tensorboards/456/experiments/678" or + "678" when tensorboard_id is passed and project and location are initialized or passed. + tensorboard_id (str): + Optional. A tensorboard resource ID. + project (str): + Optional. Project to retrieve tensorboard from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve tensorboard from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve this Tensorboard. Overrides + credentials set in aiplatform.init. + """ + + super().__init__( + project=project, + location=location, + credentials=credentials, + resource_name=tensorboard_experiment_name, + ) + self._gca_resource = self._get_gca_resource( + resource_name=tensorboard_experiment_name, + parent_resource_name_fields={Tensorboard._resource_noun: tensorboard_id} + if tensorboard_id + else tensorboard_id, + ) + + @classmethod + def create( + cls, + tensorboard_experiment_id: str, + tensorboard_name: str, + display_name: Optional[str] = None, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + request_metadata: Sequence[Tuple[str, str]] = (), + ) -> "TensorboardExperiment": + """Creates a new TensorboardExperiment. + + Example Usage: + + tb = aiplatform.TensorboardExperiment.create( + tensorboard_experiment_id='my-experiment' + tensorboard_id='456' + display_name='my display name', + description='my description', + labels={ + 'key1': 'value1', + 'key2': 'value2' + } + ) + + Args: + tensorboard_experiment_id (str): + Required. The ID to use for the Tensorboard experiment, + which will become the final component of the Tensorboard + experiment's resource name. + + This value should be 1-128 characters, and valid + characters are /[a-z][0-9]-/. + + This corresponds to the ``tensorboard_experiment_id`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + tensorboard_name (str): + Required. The resource name or ID of the Tensorboard to create + the TensorboardExperiment in. Format of resource name: + ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` + display_name (str): + Optional. The user-defined name of the Tensorboard Experiment. + The name can be up to 128 characters long and can be consist + of any UTF-8 characters. + description (str): + Optional. Description of this Tensorboard Experiment. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. + project (str): + Optional. Project to upload this model to. Overrides project set in + aiplatform.init. + location (str): + Optional. Location to upload this model to. Overrides location set in + aiplatform.init. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to upload this model. Overrides + credentials set in aiplatform.init. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + Returns: + TensorboardExperiment: The TensorboardExperiment resource. + """ + + if display_name: + utils.validate_display_name(display_name) + + if labels: + utils.validate_labels(labels) + + api_client = cls._instantiate_client(location=location, credentials=credentials) + + parent = utils.full_resource_name( + resource_name=tensorboard_name, + resource_noun=Tensorboard._resource_noun, + parse_resource_name_method=Tensorboard._parse_resource_name, + format_resource_name_method=Tensorboard._format_resource_name, + project=project, + location=location, + ) + + gapic_tensorboard_experiment = gca_tensorboard_experiment.TensorboardExperiment( + display_name=display_name, description=description, labels=labels, + ) + + _LOGGER.log_create_with_lro(cls) + + tensorboard_experiment = api_client.create_tensorboard_experiment( + parent=parent, + tensorboard_experiment=gapic_tensorboard_experiment, + tensorboard_experiment_id=tensorboard_experiment_id, + metadata=request_metadata, + ) + + _LOGGER.log_create_complete(cls, tensorboard_experiment, "tb experiment") + + return cls( + tensorboard_experiment_name=tensorboard_experiment.name, + credentials=credentials, + ) + + @classmethod + def list( + cls, + tensorboard_name: str, + filter: Optional[str] = None, + order_by: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> List["TensorboardExperiment"]: + """List TensorboardExperiemnts in a Tensorboard resource. + + Example Usage: + + aiplatform.TensorboardExperiment.list( + tensorboard_name='projects/my-project/locations/us-central1/tensorboards/123' + ) + + Args: + tensorboard_name(str): + Required. The resource name or resource ID of the + Tensorboard to list + TensorboardExperiments. Format, if resource name: + 'projects/{project}/locations/{location}/tensorboards/{tensorboard}' + filter (str): + Optional. An expression for filtering the results of the request. + For field names both snake_case and camelCase are supported. + order_by (str): + Optional. A comma-separated list of fields to order by, sorted in + ascending order. Use "desc" after a field name for descending. + Supported fields: `display_name`, `create_time`, `update_time` + project (str): + Optional. Project to retrieve list from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve list from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve list. Overrides + credentials set in aiplatform.init. + Returns: + List[TensorboardExperiment] - A list of TensorboardExperiments + """ + + parent = utils.full_resource_name( + resource_name=tensorboard_name, + resource_noun=Tensorboard._resource_noun, + parse_resource_name_method=Tensorboard._parse_resource_name, + format_resource_name_method=Tensorboard._format_resource_name, + project=project, + location=location, + ) + + return super()._list( + filter=filter, + order_by=order_by, + project=project, + location=location, + credentials=credentials, + parent=parent, + ) + + +class TensorboardRun(_TensorboardServiceResource): + """Managed tensorboard resource for Vertex AI.""" + + _resource_noun = "runs" + _getter_method = "get_tensorboard_run" + _list_method = "list_tensorboard_runs" + _delete_method = "delete_tensorboard_run" + _parse_resource_name_method = "parse_tensorboard_run_path" + _format_resource_name_method = "tensorboard_run_path" + + def __init__( + self, + tensorboard_run_name: str, + tensorboard_id: Optional[str] = None, + tensorboard_experiment_id: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ): + """Retrieves an existing tensorboard experiment given a tensorboard experiment name or ID. + + Example Usage: + + tb_exp = aiplatform.TensorboardRun( + tensorboard_run_name= "projects/123/locations/us-central1/tensorboards/456/experiments/678/run/8910" + ) + + tb_exp = aiplatform.TensorboardExperiment( + tensorboard_experiment_name= "8910", + tensorboard_id = "456", + tensorboard_experiment_id = "678" + ) + + Args: + tensorboard_run_name (str): + Required. A fully-qualified tensorboard run resource name or resource ID. + Example: "projects/123/locations/us-central1/tensorboards/456/experiments/678/runs/8910" or + "8910" when tensorboard_id and tensorboard_experiment_id are passed + and project and location are initialized or passed. + tensorboard_id (str): + Optional. A tensorboard resource ID. + tensorboard_experiment_id (str): + Optional. A tensorboard experiment resource ID. + project (str): + Optional. Project to retrieve tensorboard from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve tensorboard from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve this Tensorboard. Overrides + credentials set in aiplatform.init. + Raises: + ValueError: if only one of tensorboard_id or tensorboard_experiment_id is provided. + """ + if bool(tensorboard_id) != bool(tensorboard_experiment_id): + raise ValueError( + "Both tensorboard_id and tensorboard_experiment_id must be provided or neither should be provided." + ) + + super().__init__( + project=project, + location=location, + credentials=credentials, + resource_name=tensorboard_run_name, + ) + self._gca_resource = self._get_gca_resource( + resource_name=tensorboard_run_name, + parent_resource_name_fields={ + Tensorboard._resource_noun: tensorboard_id, + TensorboardExperiment._resource_noun: tensorboard_experiment_id, + } + if tensorboard_id + else tensorboard_id, + ) + + @classmethod + def create( + cls, + tensorboard_run_id: str, + tensorboard_experiment_name: str, + tensorboard_id: Optional[str] = None, + display_name: Optional[str] = None, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + request_metadata: Sequence[Tuple[str, str]] = (), + ) -> "TensorboardRun": + """Creates a new tensorboard. + + Example Usage: + + tb = aiplatform.TensorboardExperiment.create( + tensorboard_experiment_id='my-experiment' + tensorboard_id='456' + display_name='my display name', + description='my description', + labels={ + 'key1': 'value1', + 'key2': 'value2' + } + ) + + Args: + tensorboard_run_id (str): + Required. The ID to use for the Tensorboard run, which + will become the final component of the Tensorboard run's + resource name. + + This value should be 1-128 characters, and valid: + characters are /[a-z][0-9]-/. + tensorboard_experiment_name (str): + Required. The resource name or ID of the TensorboardExperiment + to create the TensorboardRun in. Resource name format: + ``projects/{project}/locations/{location}/tensorboards/{tensorboard}/experiments/{experiment}`` + + If resource ID is provided then tensorboard_id must be provided. + tensorboard_id (str): + Optional. The resource ID of the Tensorboard to create + the TensorboardRun in. Format of resource name. + display_name (str): + Optional. The user-defined name of the Tensorboard Run. + This value must be unique among all TensorboardRuns belonging to the + same parent TensorboardExperiment. + + If not provided tensorboard_run_id will be used. + description (str): + Optional. Description of this Tensorboard Run. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. + project (str): + Optional. Project to upload this model to. Overrides project set in + aiplatform.init. + location (str): + Optional. Location to upload this model to. Overrides location set in + aiplatform.init. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to upload this model. Overrides + credentials set in aiplatform.init. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + Returns: + TensorboardExperiment: The TensorboardExperiment resource. + """ + + if display_name: + utils.validate_display_name(display_name) + + if labels: + utils.validate_labels(labels) + + display_name = display_name or tensorboard_run_id + + api_client = cls._instantiate_client(location=location, credentials=credentials) + + parent = utils.full_resource_name( + resource_name=tensorboard_experiment_name, + resource_noun=TensorboardExperiment._resource_noun, + parse_resource_name_method=TensorboardExperiment._parse_resource_name, + format_resource_name_method=TensorboardExperiment._format_resource_name, + parent_resource_name_fields={Tensorboard._resource_noun: tensorboard_id}, + project=project, + location=location, + ) + + gapic_tensorboard_run = gca_tensorboard_run.TensorboardRun( + display_name=display_name, description=description, labels=labels, + ) + + _LOGGER.log_create_with_lro(cls) + + tensorboard_run = api_client.create_tensorboard_run( + parent=parent, + tensorboard_run=gapic_tensorboard_run, + tensorboard_run_id=tensorboard_run_id, + metadata=request_metadata, + ) + + _LOGGER.log_create_complete(cls, tensorboard_run, "tb_run") + + return cls(tensorboard_run_name=tensorboard_run.name, credentials=credentials,) + + @classmethod + def list( + cls, + tensorboard_experiment_name: str, + tensorboard_id: Optional[str] = None, + filter: Optional[str] = None, + order_by: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> List["TensorboardRun"]: + """List all instances of TensorboardRun in TensorboardExperiment. + + Example Usage: + + aiplatform.TensorboardRun.list( + tensorboard_name='projects/my-project/locations/us-central1/tensorboards/123/experiments/456' + ) + + Args: + tensorboard_experiment_name (str): + Required. The resource name or resource ID of the + TensorboardExperiment to list + TensorboardRun. Format, if resource name: + 'projects/{project}/locations/{location}/tensorboards/{tensorboard}/experiments/{experiment}' + + If resource ID is provided then tensorboard_id must be provided. + tensorboard_id (str): + Optional. The resource ID of the Tensorboard that contains the TensorboardExperiment + to list TensorboardRun. + filter (str): + Optional. An expression for filtering the results of the request. + For field names both snake_case and camelCase are supported. + order_by (str): + Optional. A comma-separated list of fields to order by, sorted in + ascending order. Use "desc" after a field name for descending. + Supported fields: `display_name`, `create_time`, `update_time` + project (str): + Optional. Project to retrieve list from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve list from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve list. Overrides + credentials set in aiplatform.init. + Returns: + List[TensorboardRun] - A list of TensorboardRun + """ + + parent = utils.full_resource_name( + resource_name=tensorboard_experiment_name, + resource_noun=TensorboardExperiment._resource_noun, + parse_resource_name_method=TensorboardExperiment._parse_resource_name, + format_resource_name_method=TensorboardExperiment._format_resource_name, + parent_resource_name_fields={Tensorboard._resource_noun: tensorboard_id}, + project=project, + location=location, + ) + + return super()._list( + filter=filter, + order_by=order_by, + project=project, + location=location, + credentials=credentials, + parent=parent, + ) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index aefcaa9dbc..38aafef4fa 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -69,11 +69,12 @@ class _TrainingJob(base.VertexAiResourceNounWithFutureManager): client_class = utils.PipelineClientWithOverride - _is_client_prediction_client = False _resource_noun = "trainingPipelines" _getter_method = "get_training_pipeline" _list_method = "list_training_pipelines" _delete_method = "delete_training_pipeline" + _parse_resource_name_method = "parse_training_pipeline_path" + _format_resource_name_method = "training_pipeline_path" def __init__( self, @@ -846,13 +847,7 @@ def _get_model(self) -> Optional[models.Model]: return None if self._gca_resource.model_to_upload.name: - fields = utils.extract_fields_from_resource_name( - self._gca_resource.model_to_upload.name - ) - - return models.Model( - fields.id, project=fields.project, location=fields.location, - ) + return models.Model(model_name=self._gca_resource.model_to_upload.name) def _wait_callback(self): """Callback performs custom logging during _block_until_complete. Override in subclass.""" @@ -916,8 +911,8 @@ def has_failed(self) -> bool: def _dashboard_uri(self) -> str: """Helper method to compose the dashboard uri where training can be viewed.""" - fields = utils.extract_fields_from_resource_name(self.resource_name) - url = f"https://console.cloud.google.com/ai/platform/locations/{fields.location}/training/{fields.id}?project={fields.project}" + fields = self._parse_resource_name(self.resource_name) + url = f"https://console.cloud.google.com/ai/platform/locations/{fields['location']}/training/{fields['training_pipeline']}?project={fields['project']}" return url @property diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index cac1248ee7..26b28dcdd7 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -19,10 +19,11 @@ import abc import datetime import pathlib -from collections import namedtuple import logging import re -from typing import Any, Dict, Match, Optional, Type, TypeVar, Tuple +from typing import Any, Callable, Dict, Optional, Type, TypeVar, Tuple + +from google.protobuf import timestamp_pb2 from google.api_core import client_options from google.api_core import gapic_v1 @@ -88,71 +89,32 @@ tensorboard_service_client_v1.TensorboardServiceClient, ) -RESOURCE_NAME_PATTERN = re.compile( - r"^projects\/(?P[\w-]+)\/locations\/(?P[\w-]+)\/(?P[\w\-\/]+)\/(?P[\w-]+)$" -) -RESOURCE_ID_PATTERN = re.compile(r"^[\w-]+$") - -Fields = namedtuple("Fields", ["project", "location", "resource", "id"],) - - -def _match_to_fields(match: Match) -> Optional[Fields]: - """Normalize RegEx groups from resource name pattern Match to class - Fields.""" - if not match: - return None - return Fields( - project=match["project"], - location=match["location"], - resource=match["resource"], - id=match["id"], - ) - - -def validate_id(resource_id: str) -> bool: - """Validate int64 resource ID number.""" - return bool(RESOURCE_ID_PATTERN.match(resource_id)) +RESOURCE_ID_PATTERN = re.compile(r"^[\w-]+$") -def extract_fields_from_resource_name( - resource_name: str, resource_noun: Optional[str] = None -) -> Optional[Fields]: - """Validates and returns extracted fields from a fully-qualified resource - name. Returns None if name is invalid. +def validate_id(resource_id: str): + """Validate resource ID. Args: - resource_name (str): - Required. A fully-qualified Vertex AI resource name + resource_id (str): Resource id. + Raises: + ValueError: If resource id is not a valid format. - resource_noun (str): - A resource noun to validate the resource name against. - For example, you would pass "datasets" to validate - "projects/123/locations/us-central1/datasets/456". - In the case of deeper naming structures, e.g., - "projects/123/locations/us-central1/metadataStores/123/contexts/456", - you would pass "metadataStores/123/contexts" as the resource_noun. - Returns: - fields (Fields): - A named tuple containing four extracted fields from a resource name: - project, location, resource, and id. These fields can be used for - subsequent method calls in the SDK. """ - fields = _match_to_fields(RESOURCE_NAME_PATTERN.match(resource_name)) - - if not fields: - return None - if resource_noun and fields.resource != resource_noun: - return None - - return fields + if not RESOURCE_ID_PATTERN.match(resource_id): + raise ValueError(f"Resource {resource_id} is not a valid resource id.") def full_resource_name( resource_name: str, resource_noun: str, + parse_resource_name_method: Callable[[str], Dict[str, str]], + format_resource_name_method: Callable[..., str], + parent_resource_name_fields: Optional[Dict[str, str]] = None, project: Optional[str] = None, location: Optional[str] = None, + resource_id_validator: Optional[Callable[[str], None]] = None, ) -> str: """Returns fully qualified resource name. @@ -161,85 +123,89 @@ def full_resource_name( Required. A fully-qualified Vertex AI resource name or resource ID. resource_noun (str): - A resource noun to validate the resource name against. + Required. A resource noun to validate the resource name against. For example, you would pass "datasets" to validate "projects/123/locations/us-central1/datasets/456". - In the case of deeper naming structures, e.g., - "projects/123/locations/us-central1/metadataStores/123/contexts/456", - you would pass "metadataStores/123/contexts" as the resource_noun. + parse_resource_name_method (Callable[[str], Dict[str,str]]): + Required. Method that parses a resource name into its segment parts. + These are generally included with GAPIC clients. + format_resource_name_method (Callable[..., str]): + Required. Method that takes segment parts of resource names and returns + the formated resource name. These are generally included with GAPIC clients. + parent_resource_name_fields (Dict[str, str]): + Optional. Dictionary of segment parts where key is the resource noun and + values are the resource ids. + For example: + { + "metadataStores": "123" + } project (str): - Optional project to retrieve resource_noun from. If not set, project + Optional. project to retrieve resource_noun from. If not set, project set in aiplatform.init will be used. location (str): - Optional location to retrieve resource_noun from. If not set, location + Optional. location to retrieve resource_noun from. If not set, location set in aiplatform.init will be used. + resource_id_validator (Callable[str, None]): + Optional. Function that validates the resource ID. Overrides the default validator, validate_id. + Should take a resource ID as string and raise ValueError if invalid. Returns: resource_name (str): A fully-qualified Vertex AI resource name. - - Raises: - ValueError: - If resource name, resource ID or project ID not provided. """ - validate_resource_noun(resource_noun) # Fully qualified resource name, e.g., "projects/.../locations/.../datasets/12345" or # "projects/.../locations/.../metadataStores/.../contexts/12345" - valid_name = extract_fields_from_resource_name( - resource_name=resource_name, resource_noun=resource_noun - ) + fields = parse_resource_name_method(resource_name) + if fields: + return resource_name + + resource_id_validator = resource_id_validator or validate_id user_project = project or initializer.global_config.project user_location = location or initializer.global_config.location - # Partial resource name (i.e. "12345") with known project and location - if ( - not valid_name - and validate_project(user_project) - and validate_region(user_location) - and validate_id(resource_name) - ): - resource_name = f"projects/{user_project}/locations/{user_location}/{resource_noun}/{resource_name}" - # Invalid resource_name parameter - elif not valid_name: - raise ValueError(f"Please provide a valid {resource_noun[:-1]} name or ID") + validate_region(user_location) + resource_id_validator(resource_name) + + format_args = { + "location": user_location, + "project": user_project, + convert_camel_case_resource_noun_to_snake_case(resource_noun): resource_name, + } + + if parent_resource_name_fields: + format_args.update( + { + convert_camel_case_resource_noun_to_snake_case(key): value + for key, value in parent_resource_name_fields.items() + } + ) - return resource_name + return format_resource_name_method(**format_args) -# TODO(b/172286889) validate resource noun -def validate_resource_noun(resource_noun: str) -> bool: - """Validates resource noun. - - Args: - resource_noun: resource noun to validate - Returns: - bool: True if no errors raised - Raises: - ValueError: If resource noun not supported. - """ - if resource_noun: - return True - raise ValueError("Please provide a valid resource noun") +# Resource nouns that are not plural in their resource names. +# Userd below to avoid conversion from plural to singular. +_SINGULAR_RESOURCE_NOUNS = {"time_series"} -# TODO(b/172288287) validate project -def validate_project(project: str) -> bool: - """Validates project. +def convert_camel_case_resource_noun_to_snake_case(resource_noun: str) -> str: + """Converts camel case to snake case to map resource name parts to GAPIC parameter names. Args: - project: project to validate + resource_noun (str): The resource noun in camel case to covert. Returns: - bool: True if no errors raised - Raises: - ValueError: If project does not exist. + Singular snake case resource noun. """ - if project: - return True - raise ValueError("Please provide a valid project ID") + snake_case = re.sub("([A-Z]+)", r"_\1", resource_noun).lower() + + # plural to singular + if snake_case in _SINGULAR_RESOURCE_NOUNS or not snake_case.endswith("s"): + return snake_case + else: + return snake_case[:-1] -# TODO(b/172932277) verify display name only contains utf-8 chars def validate_display_name(display_name: str): """Verify display name is at most 128 chars. @@ -442,6 +408,22 @@ def __getattr__(self, name: str) -> Any: def select_version(self, version: str) -> VertexAiServiceClient: return self._clients[version] + @classmethod + def get_gapic_client_class( + cls, version: Optional[str] = None + ) -> Type[VertexAiServiceClient]: + """Gets the underyilng GAPIC client. + + Used to access class and static methods without instantiating. + + Args: + version (str): + Optional. Version of client to retreive otherwise the default version is returned. + Retuns: + Underlying GAPIC client for this wrapper and version. + """ + return dict(cls._version_map)[version or cls._default_version] + class DatasetClientWithOverride(ClientWithOverride): _is_temporary = True @@ -632,3 +614,23 @@ def _timestamped_copy_to_gcs( gcs_path = "".join(["gs://", "/".join([blob.bucket.name, blob.name])]) return gcs_path + + +def get_timestamp_proto( + time: Optional[datetime.datetime] = None, +) -> timestamp_pb2.Timestamp: + """Gets timestamp proto of a given time. + Args: + time (datetime.datetime): + Optional. A user provided time. Default to datetime.datetime.now() if not given. + Returns: + timestamp_pb2.Timestamp: timestamp proto of the given time, not have higher than millisecond precision. + """ + if not time: + time = datetime.datetime.now() + t = time.timestamp() + seconds = int(t) + # must not have higher than millisecond precision. + nanos = int((t % 1 * 1e6) * 1e3) + + return timestamp_pb2.Timestamp(seconds=seconds, nanos=nanos) diff --git a/google/cloud/aiplatform/utils/console_utils.py b/google/cloud/aiplatform/utils/console_utils.py index ff9baba4cf..c108b0605e 100644 --- a/google/cloud/aiplatform/utils/console_utils.py +++ b/google/cloud/aiplatform/utils/console_utils.py @@ -15,13 +15,14 @@ # limitations under the License. # -from google.cloud.aiplatform import utils +from google.cloud.aiplatform import jobs +from google.cloud.aiplatform import tensorboard def custom_job_console_uri(custom_job_resource_name: str) -> str: """Helper method to create console uri from custom job resource name.""" - fields = utils.extract_fields_from_resource_name(custom_job_resource_name) - return f"https://console.cloud.google.com/ai/platform/locations/{fields.location}/training/{fields.id}?project={fields.project}" + fields = jobs.CustomJob._parse_resource_name(custom_job_resource_name) + return f"https://console.cloud.google.com/ai/platform/locations/{fields['location']}/training/{fields['custom_job']}?project={fields['project']}" def custom_job_tensorboard_console_uri( @@ -29,7 +30,7 @@ def custom_job_tensorboard_console_uri( ) -> str: """Helper method to create console uri to tensorboard from custom job resource.""" # projects+40556267596+locations+us-central1+tensorboards+740208820004847616+experiments+2214368039829241856 - fields = utils.extract_fields_from_resource_name(tensorboard_resource_name) + fields = tensorboard.Tensorboard._parse_resource_name(tensorboard_resource_name) experiment_resource_name = f"{tensorboard_resource_name}/experiments/{custom_job_resource_name.split('/')[-1]}" uri_experiment_resource_name = experiment_resource_name.replace("/", "+") - return f"https://{fields.location}.tensorboard.googleusercontent.com/experiment/{uri_experiment_resource_name}" + return f"https://{fields['location']}.tensorboard.googleusercontent.com/experiment/{uri_experiment_resource_name}" diff --git a/google/cloud/aiplatform/utils/featurestore_utils.py b/google/cloud/aiplatform/utils/featurestore_utils.py index 23f3e48aad..e9d26b62be 100644 --- a/google/cloud/aiplatform/utils/featurestore_utils.py +++ b/google/cloud/aiplatform/utils/featurestore_utils.py @@ -16,102 +16,147 @@ # import re -from typing import Optional, Tuple +from typing import Dict, NamedTuple, Optional from google.cloud.aiplatform.compat.services import featurestore_service_client +from google.cloud.aiplatform.compat.types import ( + feature as gca_feature, + featurestore_service as gca_featurestore_service, +) +from google.cloud.aiplatform import utils CompatFeaturestoreServiceClient = featurestore_service_client.FeaturestoreServiceClient RESOURCE_ID_PATTERN_REGEX = r"[a-z_][a-z0-9_]{0,59}" +GCS_SOURCE_TYPE = {"csv", "avro"} +_FEATURE_VALUE_TYPE_UNSPECIFIED = "VALUE_TYPE_UNSPECIFIED" -def validate_id(resource_id: str) -> bool: - """Validates feature store resource ID pattern.""" - return bool(re.compile(r"^" + RESOURCE_ID_PATTERN_REGEX + r"$").match(resource_id)) - -def validate_and_get_entity_type_resource_ids( - entity_type_name: str, featurestore_id: Optional[str] = None, -) -> Tuple[str, str]: - """Validates and gets featurestore ID and entity_type ID of the entity_type resource. +def validate_id(resource_id: str) -> None: + """Validates feature store resource ID pattern. Args: - entity_type_name (str): - Required. A fully-qualified entityType resource name or an entity_type ID - Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id/entityTypes/my_entity_type_id" - or "my_entity_type_id", with featurestore_id passed. - featurestore_id (str): - Optional. Featurestore ID of the entity_type resource. + resource_id (str): + Required. Feature Store resource ID. + + Raises: + ValueError if resource_id is invalid. + """ + if not re.compile(r"^" + RESOURCE_ID_PATTERN_REGEX + r"$").match(resource_id): + raise ValueError("Resource ID {resource_id} is not a valied resource id.") + - Returns: - Tuple[str, str] - featurestore ID and entity_type ID +def validate_feature_id(feature_id: str) -> None: + """Validates feature ID. + + Args: + feature_id (str): + Required. Feature resource ID. Raises: - ValueError: If the provided entity_type_name is not in form of a fully-qualified - entityType resource name nor an entity_type ID with featurestore_id passed. + ValueError if feature_id is invalid. """ - match = CompatFeaturestoreServiceClient.parse_entity_type_path( - path=entity_type_name - ) + match = re.compile(r"^" + RESOURCE_ID_PATTERN_REGEX + r"$").match(feature_id) - if match: - featurestore_id = match["featurestore"] - entity_type_id = match["entity_type"] - elif ( - validate_id(entity_type_name) - and featurestore_id - and validate_id(featurestore_id) - ): - entity_type_id = entity_type_name - else: + if not match: raise ValueError( - f"{entity_type_name} is not in form of a fully-qualified entityType resource name " - f"nor an entity_type ID with featurestore_id passed." + f"The value of feature_id may be up to 60 characters, and valid characters are `[a-z0-9_]`. " + f"The first character cannot be a number. Instead, get {feature_id}." ) - return (featurestore_id, entity_type_id) + reserved_words = ["entity_id", "feature_timestamp", "arrival_timestamp"] + if feature_id.lower() in reserved_words: + raise ValueError( + "The feature_id can not be any of the reserved_words: `%s`" + % ("`, `".join(reserved_words)) + ) + + +def validate_value_type(value_type: str) -> None: + """Validates user provided feature value_type string. -def validate_and_get_feature_resource_ids( - feature_name: str, - featurestore_id: Optional[str] = None, - entity_type_id: Optional[str] = None, -) -> Tuple[str, str, str]: - """Validates and gets featurestore ID, entity_type ID, and feature ID for the feature resource. Args: - feature_name (str): - Required. A fully-qualified feature resource name or a feature ID. - Example: "projects/123/locations/us-central1/featurestores/my_featurestore_id/entityTypes/my_entity_type_id/features/my_feature_id" - or "my_feature_id" when project and location are initialized or passed, with featurestore_id and entity_type_id passed. - featurestore_id (str): - Optional. Featurestore ID of the feature resource. - entity_type_id (str): - Optional. EntityType ID of the feature resource. - - Returns: - Tuple[str, str, str] - featurestore ID, entity_type ID, and feature ID + value_type (str): + Required. Immutable. Type of Feature value. + One of BOOL, BOOL_ARRAY, DOUBLE, DOUBLE_ARRAY, INT64, INT64_ARRAY, STRING, STRING_ARRAY, BYTES. Raises: - ValueError: If the provided feature_name is not in form of a fully-qualified - feature resource name nor a feature ID with featurestore_id and entity_type_id passed. + ValueError if value_type is invalid or unspecified. """ - - match = CompatFeaturestoreServiceClient.parse_feature_path(path=feature_name) - - if match: - featurestore_id = match["featurestore"] - entity_type_id = match["entity_type"] - feature_id = match["feature"] - elif ( - validate_id(feature_name) - and featurestore_id - and entity_type_id - and validate_id(featurestore_id) - and validate_id(entity_type_id) + if getattr(gca_feature.Feature.ValueType, value_type, None) in ( + gca_feature.Feature.ValueType.VALUE_TYPE_UNSPECIFIED, + None, ): - feature_id = feature_name - else: raise ValueError( - f"{feature_name} is not in form of a fully-qualified feature resource name " - f"nor a feature ID with featurestore_id and entity_type_id passed." + f"Given value_type `{value_type}` invalid or unspecified. " + f"Choose one of {gca_feature.Feature.ValueType._member_names_} except `{_FEATURE_VALUE_TYPE_UNSPECIFIED}`" + ) + + +class _FeatureConfig(NamedTuple): + """Configuration for feature creation. + + Usage: + + config = _FeatureConfig( + feature_id='my_feature_id', + value_type='int64', + description='my description', + labels={'my_key': 'my_value'}, + ) + """ + + feature_id: str + value_type: str = _FEATURE_VALUE_TYPE_UNSPECIFIED + description: Optional[str] = None + labels: Optional[Dict[str, str]] = None + + def _get_feature_id(self) -> str: + """Validates and returns the feature_id. + + Returns: + str - valid feature ID. + + Raise: + ValueError if feature_id is invalid + """ + + # Raises ValueError if invalid feature_id + validate_feature_id(feature_id=self.feature_id) + + return self.feature_id + + def _get_value_type_enum(self) -> int: + """Validates value_type and returns the enum of the value type. + + Returns: + int - valid value type enum. + """ + + # Raises ValueError if invalid value_type + validate_value_type(value_type=self.value_type) + + value_type_enum = getattr(gca_feature.Feature.ValueType, self.value_type) + + return value_type_enum + + def get_create_feature_request( + self, + ) -> gca_featurestore_service.CreateFeatureRequest: + """Return create feature request.""" + + gapic_feature = gca_feature.Feature(value_type=self._get_value_type_enum(),) + + if self.labels: + utils.validate_labels(self.labels) + gapic_feature.labels = self.labels + + if self.description: + gapic_feature.description = self.description + + create_feature_request = gca_featurestore_service.CreateFeatureRequest( + feature=gapic_feature, feature_id=self._get_feature_id() ) - return (featurestore_id, entity_type_id, feature_id) + + return create_feature_request diff --git a/google/cloud/aiplatform/version.py b/google/cloud/aiplatform/version.py index 8ab162f940..9c6c8f1633 100644 --- a/google/cloud/aiplatform/version.py +++ b/google/cloud/aiplatform/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.8.1" +__version__ = "1.9.0" diff --git a/google/cloud/aiplatform_v1/services/prediction_service/client.py b/google/cloud/aiplatform_v1/services/prediction_service/client.py index 07834c79d6..e3cb2c5a53 100644 --- a/google/cloud/aiplatform_v1/services/prediction_service/client.py +++ b/google/cloud/aiplatform_v1/services/prediction_service/client.py @@ -390,7 +390,7 @@ def predict( timeout: float = None, metadata: Sequence[Tuple[str, str]] = (), ) -> prediction_service.PredictResponse: - r"""Perform an online prediction. + """Perform an online prediction. Args: request (Union[google.cloud.aiplatform_v1.types.PredictRequest, dict]): @@ -494,7 +494,7 @@ def raw_predict( timeout: float = None, metadata: Sequence[Tuple[str, str]] = (), ) -> httpbody_pb2.HttpBody: - r"""Perform an online prediction with an arbitrary HTTP payload. + """Perform an online prediction with an arbitrary HTTP payload. The response includes the following HTTP headers: @@ -651,7 +651,7 @@ def explain( timeout: float = None, metadata: Sequence[Tuple[str, str]] = (), ) -> prediction_service.ExplainResponse: - r"""Perform an online explanation. + """Perform an online explanation. If [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id] diff --git a/samples/model-builder/create_training_pipeline_tabular_classification_sample.py b/samples/model-builder/create_training_pipeline_tabular_classification_sample.py index 6bd9405383..317a1472b6 100644 --- a/samples/model-builder/create_training_pipeline_tabular_classification_sample.py +++ b/samples/model-builder/create_training_pipeline_tabular_classification_sample.py @@ -33,6 +33,7 @@ def create_training_pipeline_tabular_classification_sample( tabular_classification_job = aiplatform.AutoMLTabularTrainingJob( display_name=display_name, + optimization_prediction_type="classification" ) my_tabular_dataset = aiplatform.TabularDataset(dataset_id) diff --git a/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py b/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py index c015e99785..1cfbb02202 100644 --- a/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py +++ b/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py @@ -44,6 +44,7 @@ def test_create_training_pipeline_tabular_classification_sample( ) mock_get_automl_tabular_training_job.assert_called_once_with( display_name=constants.DISPLAY_NAME, + optimization_prediction_type="classification" ) mock_run_automl_tabular_training_job.assert_called_once_with( dataset=mock_tabular_dataset, diff --git a/samples/model-builder/create_training_pipeline_tabular_regression_sample.py b/samples/model-builder/create_training_pipeline_tabular_regression_sample.py index 2404bb37e2..f7edcce1d9 100644 --- a/samples/model-builder/create_training_pipeline_tabular_regression_sample.py +++ b/samples/model-builder/create_training_pipeline_tabular_regression_sample.py @@ -33,6 +33,7 @@ def create_training_pipeline_tabular_regression_sample( tabular_regression_job = aiplatform.AutoMLTabularTrainingJob( display_name=display_name, + optimization_prediction_type="regression" ) my_tabular_dataset = aiplatform.TabularDataset(dataset_id) diff --git a/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py b/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py index 1e897b5851..d9a6b386e0 100644 --- a/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py +++ b/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py @@ -44,6 +44,7 @@ def test_create_training_pipeline_tabular_regression_sample( ) mock_get_automl_tabular_training_job.assert_called_once_with( display_name=constants.DISPLAY_NAME, + optimization_prediction_type="regression" ) mock_run_automl_tabular_training_job.assert_called_once_with( dataset=mock_tabular_dataset, diff --git a/setup.py b/setup.py index 4ef6968114..5ceb81b60e 100644 --- a/setup.py +++ b/setup.py @@ -33,9 +33,10 @@ exec(fp.read(), version) version = version["__version__"] -tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] +tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.7.0"] metadata_extra_require = ["pandas >= 1.0.0"] xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] +lit_extra_require = ["tensorflow >= 2.3.0", "pandas >= 1.0.0", "lit-nlp >= 0.4.0"] profiler_extra_require = [ "tensorboard-plugin-profile >= 2.4.0", "werkzeug >= 2.0.0", @@ -43,10 +44,17 @@ ] full_extra_require = list( - set(tensorboard_extra_require + metadata_extra_require + xai_extra_require) + set( + tensorboard_extra_require + + metadata_extra_require + + xai_extra_require + + lit_extra_require + ) ) testing_extra_require = ( - full_extra_require + profiler_extra_require + ["grpcio-testing", "pytest-xdist"] + full_extra_require + + profiler_extra_require + + ["grpcio-testing", "pytest-xdist", "ipython"] ) @@ -88,7 +96,8 @@ "tensorboard": tensorboard_extra_require, "testing": testing_extra_require, "xai": xai_extra_require, - "cloud-profiler": profiler_extra_require, + "lit": lit_extra_require, + "cloud_profiler": profiler_extra_require, }, python_requires=">=3.6", scripts=[], diff --git a/tests/system/aiplatform/e2e_base.py b/tests/system/aiplatform/e2e_base.py index c63c715d7c..61b9e7f36c 100644 --- a/tests/system/aiplatform/e2e_base.py +++ b/tests/system/aiplatform/e2e_base.py @@ -58,12 +58,12 @@ def setup_method(self): importlib.reload(initializer) importlib.reload(aiplatform) - @pytest.fixture() + @pytest.fixture(scope="class") def shared_state(self) -> Generator[Dict[str, Any], None, None]: shared_state = {} yield shared_state - @pytest.fixture() + @pytest.fixture(scope="class") def prepare_staging_bucket( self, shared_state: Dict[str, Any] ) -> Generator[storage.bucket.Bucket, None, None]: @@ -80,7 +80,7 @@ def prepare_staging_bucket( ) yield - @pytest.fixture() + @pytest.fixture(scope="class") def delete_staging_bucket(self, shared_state: Dict[str, Any]): """Delete the staging bucket and all it's contents""" @@ -90,7 +90,7 @@ def delete_staging_bucket(self, shared_state: Dict[str, Any]): bucket = shared_state["bucket"] bucket.delete(force=True) - @pytest.fixture(autouse=True) + @pytest.fixture(scope="class", autouse=True) def teardown(self, shared_state: Dict[str, Any]): """Delete every Vertex AI resource created during test""" @@ -104,8 +104,10 @@ def teardown(self, shared_state: Dict[str, Any]): for resource in shared_state["resources"]: try: - if isinstance(resource, aiplatform.Endpoint): - resource.delete(force=True) # Undeploy model then delete endpoint + if isinstance(resource, (aiplatform.Endpoint, aiplatform.Featurestore)): + # For endpoint, undeploy model then delete endpoint + # For featurestore, force delete its entity_types and features with the featurestore + resource.delete(force=True) else: resource.delete() except exceptions.GoogleAPIError as e: diff --git a/tests/system/aiplatform/test_featurestore.py b/tests/system/aiplatform/test_featurestore.py index 6107f826ec..65850f7d67 100644 --- a/tests/system/aiplatform/test_featurestore.py +++ b/tests/system/aiplatform/test_featurestore.py @@ -15,24 +15,235 @@ # limitations under the License. # +import logging + from google.cloud import aiplatform from tests.system.aiplatform import e2e_base +_TEST_USERS_ENTITY_TYPE_GCS_SRC = ( + "gs://cloud-samples-data-us-central1/vertex-ai/feature-store/datasets/users.avro" +) +_TEST_MOVIES_ENTITY_TYPE_GCS_SRC = ( + "gs://cloud-samples-data-us-central1/vertex-ai/feature-store/datasets/movies.avro" +) -class TestFeaturestore(e2e_base.TestEndToEnd): +_TEST_FEATURESTORE_ID = "movie_prediction" +_TEST_USER_ENTITY_TYPE_ID = "users" +_TEST_MOVIE_ENTITY_TYPE_ID = "movies" + +_TEST_USER_AGE_FEATURE_ID = "age" +_TEST_USER_GENDER_FEATURE_ID = "gender" +_TEST_USER_LIKED_GENRES_FEATURE_ID = "liked_genres" - _temp_prefix = "temp-vertex-sdk-e2e-feature-store-test" +_TEST_MOVIE_TITLE_FEATURE_ID = "title" +_TEST_MOVIE_GENRES_FEATURE_ID = "genres" +_TEST_MOVIE_AVERAGE_RATING_FEATURE_ID = "average_rating" + + +class TestFeaturestore(e2e_base.TestEndToEnd): - def test_create_and_get_featurestore(self, shared_state): + _temp_prefix = "temp_vertex_sdk_e2e_featurestore_test" + def test_create_get_list_featurestore(self, shared_state): aiplatform.init( project=e2e_base._PROJECT, location=e2e_base._LOCATION, ) - shared_state["resources"] = [] + base_list_featurestores = len(aiplatform.Featurestore.list()) + shared_state["base_list_searched_features"] = len(aiplatform.Feature.search()) + + featurestore_id = self._make_display_name(key=_TEST_FEATURESTORE_ID).replace( + "-", "_" + )[:60] + featurestore = aiplatform.Featurestore.create(featurestore_id=featurestore_id) + + shared_state["resources"] = [featurestore] + shared_state["featurestore"] = featurestore + shared_state["featurestore_name"] = featurestore.resource_name + + get_featurestore = aiplatform.Featurestore( + featurestore_name=featurestore.resource_name + ) + assert featurestore.resource_name == get_featurestore.resource_name list_featurestores = aiplatform.Featurestore.list() - assert len(list_featurestores) >= 0 + assert (len(list_featurestores) - base_list_featurestores) == 1 + + def test_create_get_list_entity_types(self, shared_state): + + assert shared_state["featurestore"] + assert shared_state["featurestore_name"] + + featurestore = shared_state["featurestore"] + featurestore_name = shared_state["featurestore_name"] + + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) + + # Users + user_entity_type = featurestore.create_entity_type( + entity_type_id=_TEST_USER_ENTITY_TYPE_ID + ) + shared_state["user_entity_type"] = user_entity_type + shared_state["user_entity_type_name"] = user_entity_type.resource_name + + get_user_entity_type = featurestore.get_entity_type( + entity_type_id=_TEST_USER_ENTITY_TYPE_ID + ) + assert user_entity_type.resource_name == get_user_entity_type.resource_name + + # Movies + movie_entity_type = aiplatform.EntityType.create( + entity_type_id=_TEST_MOVIE_ENTITY_TYPE_ID, + featurestore_name=featurestore_name, + ) + shared_state["movie_entity_type"] = movie_entity_type + shared_state["movie_entity_type_name"] = movie_entity_type.resource_name + + get_movie_entity_type = aiplatform.EntityType( + entity_type_name=movie_entity_type.resource_name + ) + assert movie_entity_type.resource_name == get_movie_entity_type.resource_name + + list_entity_types = aiplatform.EntityType.list( + featurestore_name=featurestore_name + ) + assert len(list_entity_types) == 2 + + def test_create_get_list_features(self, shared_state): + + assert shared_state["user_entity_type"] + assert shared_state["user_entity_type_name"] + user_entity_type = shared_state["user_entity_type"] + user_entity_type_name = shared_state["user_entity_type_name"] + + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) + + list_user_features = user_entity_type.list_features() + assert len(list_user_features) == 0 + + # User Features + user_age_feature = user_entity_type.create_feature( + feature_id=_TEST_USER_AGE_FEATURE_ID, value_type="INT64" + ) + + get_user_age_feature = user_entity_type.get_feature( + feature_id=_TEST_USER_AGE_FEATURE_ID + ) + assert user_age_feature.resource_name == get_user_age_feature.resource_name + + user_gender_feature = aiplatform.Feature.create( + feature_id=_TEST_USER_GENDER_FEATURE_ID, + value_type="STRING", + entity_type_name=user_entity_type_name, + ) + + get_user_gender_feature = aiplatform.Feature( + feature_name=user_gender_feature.resource_name + ) + assert ( + user_gender_feature.resource_name == get_user_gender_feature.resource_name + ) + + user_liked_genres_feature = user_entity_type.create_feature( + feature_id=_TEST_USER_LIKED_GENRES_FEATURE_ID, value_type="STRING_ARRAY", + ) + + get_user_liked_genres_feature = aiplatform.Feature( + feature_name=user_liked_genres_feature.resource_name + ) + assert ( + user_liked_genres_feature.resource_name + == get_user_liked_genres_feature.resource_name + ) + + list_user_features = user_entity_type.list_features() + assert len(list_user_features) == 3 + + def test_ingest_feature_values(self, shared_state, caplog): + + assert shared_state["user_entity_type"] + user_entity_type = shared_state["user_entity_type"] + + caplog.set_level(logging.INFO) + + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) + + user_entity_type.ingest_from_gcs( + feature_ids=[ + _TEST_USER_AGE_FEATURE_ID, + _TEST_USER_GENDER_FEATURE_ID, + _TEST_USER_LIKED_GENRES_FEATURE_ID, + ], + feature_time="update_time", + gcs_source_uris=_TEST_USERS_ENTITY_TYPE_GCS_SRC, + gcs_source_type="avro", + entity_id_field="user_id", + worker_count=2, + ) + + assert "EntityType feature values imported." in caplog.text + + caplog.clear() + + def test_batch_create_features_and_ingest_feature_values( + self, shared_state, caplog + ): + + assert shared_state["movie_entity_type"] + movie_entity_type = shared_state["movie_entity_type"] + + caplog.set_level(logging.INFO) + + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) + + movie_feature_configs = { + _TEST_MOVIE_TITLE_FEATURE_ID: {"value_type": "STRING"}, + _TEST_MOVIE_GENRES_FEATURE_ID: {"value_type": "STRING"}, + _TEST_MOVIE_AVERAGE_RATING_FEATURE_ID: {"value_type": "DOUBLE"}, + } + + list_movie_features = movie_entity_type.list_features() + assert len(list_movie_features) == 0 + + movie_entity_type.batch_create_features(feature_configs=movie_feature_configs) + + movie_entity_type.ingest_from_gcs( + feature_ids=[ + _TEST_MOVIE_TITLE_FEATURE_ID, + _TEST_MOVIE_GENRES_FEATURE_ID, + _TEST_MOVIE_AVERAGE_RATING_FEATURE_ID, + ], + feature_time="update_time", + gcs_source_uris=_TEST_MOVIES_ENTITY_TYPE_GCS_SRC, + gcs_source_type="avro", + entity_id_field="movie_id", + worker_count=2, + ) + + list_movie_features = movie_entity_type.list_features() + assert len(list_movie_features) == 3 + + assert "EntityType feature values imported." in caplog.text + + caplog.clear() + + def test_search_features(self, shared_state): + + assert shared_state["base_list_searched_features"] is not None + + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) list_searched_features = aiplatform.Feature.search() - assert len(list_searched_features) >= 0 + assert ( + len(list_searched_features) - shared_state["base_list_searched_features"] + ) == 6 diff --git a/tests/system/aiplatform/test_tensorboard.py b/tests/system/aiplatform/test_tensorboard.py index 9ec8179ca5..5c3d3f003a 100644 --- a/tests/system/aiplatform/test_tensorboard.py +++ b/tests/system/aiplatform/test_tensorboard.py @@ -42,3 +42,44 @@ def test_create_and_get_tensorboard(self, shared_state): list_tb = aiplatform.Tensorboard.list() assert len(list_tb) > 0 + + tb_experiment = aiplatform.TensorboardExperiment.create( + tensorboard_experiment_id="vertex-sdk-e2e-test-experiment", + tensorboard_name=tb.resource_name, + display_name=self._make_display_name("tensorboard_experiment"), + description="Vertex SDK Integration test.", + labels={"test": "labels"}, + ) + + shared_state["resources"].append(tb_experiment) + + get_tb_experiment = aiplatform.TensorboardExperiment( + tb_experiment.resource_name + ) + + assert tb_experiment.resource_name == get_tb_experiment.resource_name + + list_tb_experiment = aiplatform.TensorboardExperiment.list( + tensorboard_name=tb.resource_name + ) + + assert len(list_tb_experiment) > 0 + + tb_run = aiplatform.TensorboardRun.create( + tensorboard_run_id="test-run", + tensorboard_experiment_name=tb_experiment.resource_name, + description="Vertex SDK Integration test run", + labels={"test": "labels"}, + ) + + shared_state["resources"].append(tb_run) + + get_tb_run = aiplatform.TensorboardRun(tb_run.resource_name) + + assert tb_run.resource_name == get_tb_run.resource_name + + list_tb_run = aiplatform.TensorboardRun.list( + tensorboard_experiment_name=tb_experiment.resource_name + ) + + assert len(list_tb_run) > 0 diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py index 4ec527e31e..7f9856a22b 100644 --- a/tests/unit/aiplatform/test_endpoints.py +++ b/tests/unit/aiplatform/test_endpoints.py @@ -15,6 +15,7 @@ # limitations under the License. # +import copy import pytest from unittest import mock @@ -56,8 +57,10 @@ _TEST_DISPLAY_NAME = "test-display-name" _TEST_DISPLAY_NAME_2 = "test-display-name-2" +_TEST_DISPLAY_NAME_3 = "test-display-name-3" _TEST_ID = "1028944691210842416" _TEST_ID_2 = "4366591682456584192" +_TEST_ID_3 = "5820582938582924817" _TEST_DESCRIPTION = "test-description" _TEST_ENDPOINT_NAME = ( @@ -80,6 +83,24 @@ _TEST_DEPLOYED_MODELS = [ gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2), + gca_endpoint.DeployedModel(id=_TEST_ID_3, display_name=_TEST_DISPLAY_NAME_3), +] + +_TEST_TRAFFIC_SPLIT = {_TEST_ID: 0, _TEST_ID_2: 100, _TEST_ID_3: 0} + +_TEST_LONG_TRAFFIC_SPLIT = { + "m1": 40, + "m2": 10, + "m3": 30, + "m4": 0, + "m5": 5, + "m6": 8, + "m7": 7, +} +_TEST_LONG_TRAFFIC_SPLIT_SORTED_IDS = ["m4", "m5", "m7", "m6", "m2", "m3", "m1"] +_TEST_LONG_DEPLOYED_MODELS = [ + gca_endpoint.DeployedModel(id=id, display_name=f"{id}_display_name") + for id in _TEST_LONG_TRAFFIC_SPLIT.keys() ] _TEST_MACHINE_TYPE = "n1-standard-32" @@ -200,6 +221,21 @@ def get_endpoint_with_models_mock(): display_name=_TEST_DISPLAY_NAME, name=_TEST_ENDPOINT_NAME, deployed_models=_TEST_DEPLOYED_MODELS, + traffic_split=_TEST_TRAFFIC_SPLIT, + ) + yield get_endpoint_mock + + +@pytest.fixture +def get_endpoint_with_many_models_mock(): + with mock.patch.object( + endpoint_service_client.EndpointServiceClient, "get_endpoint" + ) as get_endpoint_mock: + get_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + deployed_models=_TEST_LONG_DEPLOYED_MODELS, + traffic_split=_TEST_LONG_TRAFFIC_SPLIT, ) yield get_endpoint_mock @@ -378,7 +414,6 @@ def test_constructor(self, create_endpoint_client_mock): client_class=utils.EndpointClientWithOverride, credentials=initializer.global_config.credentials, location_override=_TEST_LOCATION, - prediction_client=False, ), mock.call( client_class=utils.PredictionClientWithOverride, @@ -464,7 +499,6 @@ def test_constructor_with_custom_credentials(self, create_endpoint_client_mock): client_class=utils.EndpointClientWithOverride, credentials=creds, location_override=_TEST_LOCATION, - prediction_client=False, ), mock.call( client_class=utils.PredictionClientWithOverride, @@ -992,16 +1026,18 @@ def test_undeploy_with_traffic_split(self, undeploy_model_mock, sync): @pytest.mark.usefixtures("get_endpoint_mock") @pytest.mark.parametrize("sync", [True, False]) def test_undeploy_raise_error_traffic_split_total(self, sync): - with pytest.raises(ValueError): + with pytest.raises(ValueError) as e: test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_endpoint.undeploy( deployed_model_id="model1", traffic_split={"model2": 99}, sync=sync ) + assert e.match("Sum of all traffic within traffic split needs to be 100.") + @pytest.mark.usefixtures("get_endpoint_mock") @pytest.mark.parametrize("sync", [True, False]) def test_undeploy_raise_error_undeployed_model_traffic(self, sync): - with pytest.raises(ValueError): + with pytest.raises(ValueError) as e: test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_endpoint.undeploy( deployed_model_id="model1", @@ -1009,6 +1045,65 @@ def test_undeploy_raise_error_undeployed_model_traffic(self, sync): sync=sync, ) + assert e.match("Model being undeployed should have 0 traffic.") + + @pytest.mark.usefixtures("get_endpoint_with_models_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_undeploy_raises_error_on_zero_leftover_traffic(self, sync): + """ + Attempting to undeploy model with 100% traffic on an Endpoint with + multiple models deployed without an updated traffic_split should + raise an informative error. + """ + + traffic_remaining = _TEST_TRAFFIC_SPLIT[_TEST_ID_2] + + assert traffic_remaining == 100 # Confirm this model has all traffic + assert sum(_TEST_TRAFFIC_SPLIT.values()) == 100 # Mock traffic sums to 100% + + with pytest.raises(ValueError) as e: + test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) + test_endpoint.undeploy( + deployed_model_id=_TEST_ID_2, sync=sync, + ) + + assert e.match( + f"Undeploying deployed model '{_TEST_ID_2}' would leave the remaining " + f"traffic split at 0%." + ) + + @pytest.mark.usefixtures("get_endpoint_with_models_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_undeploy_zero_traffic_model_without_new_traffic_split( + self, undeploy_model_mock, sync + ): + """ + Attempting to undeploy model with zero traffic without providing + a new traffic split should not raise any errors. + """ + + traffic_remaining = _TEST_TRAFFIC_SPLIT[_TEST_ID_3] + + assert not traffic_remaining # Confirm there is zero traffic + + test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) + test_endpoint.undeploy( + deployed_model_id=_TEST_ID_3, sync=sync, + ) + + if not sync: + test_endpoint.wait() + + expected_new_traffic_split = copy.deepcopy(_TEST_TRAFFIC_SPLIT) + expected_new_traffic_split.pop(_TEST_ID_3) + + undeploy_model_mock.assert_called_once_with( + endpoint=test_endpoint.resource_name, + deployed_model_id=_TEST_ID_3, + traffic_split=expected_new_traffic_split, + metadata=(), + ) + def test_predict(self, get_endpoint_mock, predict_client_predict_mock): test_endpoint = models.Endpoint(_TEST_ID) @@ -1059,10 +1154,15 @@ def test_list_models(self, get_endpoint_with_models_mock): assert my_models == _TEST_DEPLOYED_MODELS - @pytest.mark.usefixtures("get_endpoint_with_models_mock") + @pytest.mark.usefixtures("get_endpoint_with_many_models_mock") @pytest.mark.parametrize("sync", [True, False]) def test_undeploy_all(self, sdk_private_undeploy_mock, sync): + # Ensure mock traffic split deployed model IDs are same as expected IDs + assert set(_TEST_LONG_TRAFFIC_SPLIT_SORTED_IDS) == set( + _TEST_LONG_TRAFFIC_SPLIT.keys() + ) + ept = aiplatform.Endpoint(_TEST_ID) ept.undeploy_all(sync=sync) @@ -1070,12 +1170,12 @@ def test_undeploy_all(self, sdk_private_undeploy_mock, sync): ept.wait() # undeploy_all() results in an undeploy() call for each deployed_model + # Models are undeployed in ascending order of traffic percentage sdk_private_undeploy_mock.assert_has_calls( [ - mock.call(deployed_model_id=deployed_model.id, sync=sync) - for deployed_model in _TEST_DEPLOYED_MODELS + mock.call(deployed_model_id=deployed_model_id, sync=sync) + for deployed_model_id in _TEST_LONG_TRAFFIC_SPLIT_SORTED_IDS ], - any_order=True, ) @pytest.mark.usefixtures("list_endpoints_mock") diff --git a/tests/unit/aiplatform/test_explain_lit.py b/tests/unit/aiplatform/test_explain_lit.py new file mode 100644 index 0000000000..718f62b022 --- /dev/null +++ b/tests/unit/aiplatform/test_explain_lit.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import pytest +import tensorflow as tf +import pandas as pd + +from lit_nlp.api import types as lit_types +from lit_nlp import notebook +from unittest import mock +from google.cloud.aiplatform.explain.lit import ( + create_lit_dataset, + create_lit_model, + open_lit, + set_up_and_open_lit, +) + + +@pytest.fixture +def widget_render_mock(): + with mock.patch.object(notebook.LitWidget, "render") as render_mock: + yield render_mock + + +@pytest.fixture +def set_up_sequential(tmpdir): + # Set up a sequential model + seq_model = tf.keras.models.Sequential() + seq_model.add(tf.keras.layers.Dense(32, activation="relu", input_shape=(2,))) + seq_model.add(tf.keras.layers.Dense(32, activation="relu")) + seq_model.add(tf.keras.layers.Dense(1, activation="sigmoid")) + saved_model_path = str(tmpdir.mkdir("tmp")) + tf.saved_model.save(seq_model, saved_model_path) + feature_types = collections.OrderedDict( + [("feature_1", lit_types.Scalar()), ("feature_2", lit_types.Scalar())] + ) + label_types = collections.OrderedDict([("label", lit_types.RegressionScore())]) + yield feature_types, label_types, saved_model_path + + +@pytest.fixture +def set_up_pandas_dataframe_and_columns(): + dataframe = pd.DataFrame.from_dict( + {"feature_1": [1.0, 2.0], "feature_2": [3.0, 4.0], "label": [1.0, 0.0]} + ) + columns = collections.OrderedDict( + [ + ("feature_1", lit_types.Scalar()), + ("feature_2", lit_types.Scalar()), + ("label", lit_types.RegressionScore()), + ] + ) + yield dataframe, columns + + +def test_create_lit_dataset_from_pandas_returns_dataset( + set_up_pandas_dataframe_and_columns, +): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + lit_dataset = create_lit_dataset(pd_dataset, lit_columns) + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + {"feature_1": 2.0, "feature_2": 4.0, "label": 0.0}, + ] + + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples + + +def test_create_lit_model_from_tensorflow_returns_model(set_up_sequential): + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + {"feature_1": 3.0, "feature_2": 4.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 2 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 + + +def test_open_lit( + set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock +): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + lit_dataset = create_lit_dataset(pd_dataset, lit_columns) + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + + open_lit({"model": lit_model}, {"dataset": lit_dataset}) + widget_render_mock.assert_called_once() + + +def test_set_up_and_open_lit( + set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock +): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + feature_types, label_types, saved_model_path = set_up_sequential + lit_dataset, lit_model = set_up_and_open_lit( + pd_dataset, lit_columns, saved_model_path, feature_types, label_types + ) + + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + {"feature_1": 2.0, "feature_2": 4.0, "label": 0.0}, + ] + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + {"feature_1": 3.0, "feature_2": 4.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 2 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 + + widget_render_mock.assert_called_once() diff --git a/tests/unit/aiplatform/test_featurestores.py b/tests/unit/aiplatform/test_featurestores.py index 4cede4ba09..f76e6ecf22 100644 --- a/tests/unit/aiplatform/test_featurestores.py +++ b/tests/unit/aiplatform/test_featurestores.py @@ -16,6 +16,7 @@ # import pytest +import datetime from unittest import mock from importlib import reload @@ -27,18 +28,19 @@ from google.cloud import aiplatform from google.cloud.aiplatform import base from google.cloud.aiplatform import initializer +from google.cloud.aiplatform import utils from google.cloud.aiplatform.utils import featurestore_utils - from google.cloud.aiplatform_v1.services.featurestore_service import ( client as featurestore_service_client, ) - from google.cloud.aiplatform_v1.types import ( - featurestore as gca_featurestore, + encryption_spec as gca_encryption_spec, entity_type as gca_entity_type, feature as gca_feature, - encryption_spec as gca_encryption_spec, + featurestore as gca_featurestore, + featurestore_service as gca_featurestore_service, + io as gca_io, ) # project @@ -66,6 +68,9 @@ _TEST_FEATURE_ID = "feature_id" _TEST_FEATURE_NAME = f"{_TEST_ENTITY_TYPE_NAME}/features/{_TEST_FEATURE_ID}" _TEST_FEATURE_INVALID = f"{_TEST_ENTITY_TYPE_NAME}/feature/{_TEST_FEATURE_ID}" +_TEST_FEATURE_VALUE_TYPE = "INT64" +_TEST_FEATURE_VALUE_TYPE_ENUM = 9 +_TEST_FEATURE_ID_INVALID = "1feature_id" # misc _TEST_DESCRIPTION = "my description" @@ -118,6 +123,38 @@ gca_feature.Feature(name=_TEST_FEATURE_NAME,), ] +_TEST_FEATURE_CONFIGS = { + "my_feature_id_1": {"value_type": _TEST_FEATURE_VALUE_TYPE}, +} + +_TEST_IMPORTING_FEATURE_IDS = ["my_feature_id_1"] + +_TEST_IMPORTING_FEATURE_SOURCE_FIELDS = { + "my_feature_id_1": "my_feature_id_1_source_field", +} + +_TEST_FEATURE_TIME_FIELD = "feature_time_field" +_TEST_FEATURE_TIME = datetime.datetime.now() + +_TEST_BQ_SOURCE_URI = "bq://project.dataset.table_name" +_TEST_GCS_AVRO_SOURCE_URIS = [ + "gs://my_bucket/my_file_1.avro", +] +_TEST_GCS_CSV_SOURCE_URIS = [ + "gs://my_bucket/my_file_1.csv", +] +_TEST_GCS_SOURCE_TYPE_CSV = "csv" +_TEST_GCS_SOURCE_TYPE_AVRO = "avro" +_TEST_GCS_SOURCE_TYPE_INVALID = "json" + +_TEST_BQ_SOURCE = gca_io.BigQuerySource(input_uri=_TEST_BQ_SOURCE_URI) +_TEST_AVRO_SOURCE = gca_io.AvroSource( + gcs_source=gca_io.GcsSource(uris=_TEST_GCS_AVRO_SOURCE_URIS) +) +_TEST_CSV_SOURCE = gca_io.CsvSource( + gcs_source=gca_io.GcsSource(uris=_TEST_GCS_CSV_SOURCE_URIS) +) + # All Featurestore Mocks @pytest.fixture @@ -173,6 +210,23 @@ def search_features_mock(): yield search_features_mock +@pytest.fixture +def create_featurestore_mock(): + with patch.object( + featurestore_service_client.FeaturestoreServiceClient, "create_featurestore" + ) as create_featurestore_mock: + create_featurestore_lro_mock = mock.Mock(operation.Operation) + create_featurestore_lro_mock.result.return_value = gca_featurestore.Featurestore( + name=_TEST_FEATURESTORE_NAME, + online_serving_config=gca_featurestore.Featurestore.OnlineServingConfig( + fixed_node_count=_TEST_ONLINE_SERVING_CONFIG + ), + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + create_featurestore_mock.return_value = create_featurestore_lro_mock + yield create_featurestore_mock + + # ALL EntityType Mocks @pytest.fixture def get_entity_type_mock(): @@ -214,6 +268,29 @@ def delete_entity_type_mock(): yield delete_entity_type_mock +@pytest.fixture +def create_entity_type_mock(): + with patch.object( + featurestore_service_client.FeaturestoreServiceClient, "create_entity_type" + ) as create_entity_type_mock: + create_entity_type_lro_mock = mock.Mock(operation.Operation) + create_entity_type_lro_mock.result.return_value = gca_entity_type.EntityType( + name=_TEST_ENTITY_TYPE_NAME + ) + create_entity_type_mock.return_value = create_entity_type_lro_mock + yield create_entity_type_mock + + +@pytest.fixture +def import_feature_values_mock(): + with patch.object( + featurestore_service_client.FeaturestoreServiceClient, "import_feature_values" + ) as import_feature_values_mock: + import_feature_values_lro_mock = mock.Mock(operation.Operation) + import_feature_values_mock.return_value = import_feature_values_lro_mock + yield import_feature_values_mock + + # ALL Feature Mocks @pytest.fixture def get_feature_mock(): @@ -253,96 +330,157 @@ def delete_feature_mock(): yield delete_feature_mock +@pytest.fixture +def create_feature_mock(): + with patch.object( + featurestore_service_client.FeaturestoreServiceClient, "create_feature" + ) as create_feature_mock: + create_feature_lro_mock = mock.Mock(operation.Operation) + create_feature_lro_mock.result.return_value = gca_feature.Feature( + name=_TEST_FEATURE_NAME, value_type=_TEST_FEATURE_VALUE_TYPE_ENUM, + ) + create_feature_mock.return_value = create_feature_lro_mock + yield create_feature_mock + + +@pytest.fixture +def batch_create_features_mock(): + with patch.object( + featurestore_service_client.FeaturestoreServiceClient, "batch_create_features" + ) as batch_create_features_mock: + batch_create_features_lro_mock = mock.Mock(operation.Operation) + batch_create_features_mock.return_value = batch_create_features_lro_mock + yield batch_create_features_mock + + class TestFeaturestoreUtils: @pytest.mark.parametrize( - "resource_id, expected", - [ - ("resource_id", True), - ("resource_id12345", True), - ("12345resource_id", False), - ("_resource_id", True), - ("resource_id/1234", False), - ("_resource_id/1234", False), - ("resource-id-1234", False), - ("123456", False), - ("c" * 61, False), - ("_123456", True), - ], + "resource_id", ["resource_id", "resource_id12345", "_resource_id", "_123456"], ) - def test_validate_resource_id(self, resource_id: str, expected: bool): - assert expected == featurestore_utils.validate_id(resource_id) + def test_validate_resource_id(self, resource_id: str): + featurestore_utils.validate_id(resource_id) @pytest.mark.parametrize( - "feature_name, featurestore_id, entity_type_id", + "resource_id", [ - (_TEST_FEATURE_NAME, None, None,), - (_TEST_FEATURE_ID, _TEST_FEATURESTORE_ID, _TEST_ENTITY_TYPE_ID,), + "12345resource_id", + "resource_id/1234", + "_resource_id/1234", + "resource-id-1234", + "123456", + "c" * 61, ], ) - def test_validate_and_get_feature_resource_ids( - self, feature_name: str, featurestore_id: str, entity_type_id: str, - ): - assert ( - _TEST_FEATURESTORE_ID, - _TEST_ENTITY_TYPE_ID, - _TEST_FEATURE_ID, - ) == featurestore_utils.validate_and_get_feature_resource_ids( - feature_name=feature_name, - featurestore_id=featurestore_id, - entity_type_id=entity_type_id, - ) + def test_validate_invalid_resource_id(self, resource_id: str): + with pytest.raises(ValueError): + featurestore_utils.validate_id(resource_id) @pytest.mark.parametrize( - "feature_name, featurestore_id, entity_type_id", + "feature_id", ["resource_id", "resource_id12345", "_resource_id", "_123456"], + ) + def test_validate_feature_id(self, feature_id: str): + assert featurestore_utils.validate_feature_id(feature_id=feature_id) is None + + @pytest.mark.parametrize( + "feature_id", [ - (_TEST_FEATURE_INVALID, None, None,), - (_TEST_FEATURE_ID, None, _TEST_ENTITY_TYPE_ID,), - (_TEST_FEATURE_ID, None, None,), - (_TEST_FEATURE_ID, _TEST_FEATURESTORE_NAME, None,), + "12345resource_id", + "resource_id/1234", + "_resource_id/1234", + "resource-id-1234", + "123456", + "c" * 61, + "entity_id", + "Entity_ID", + "feature_timestamp", + "Feature_Timestamp", + "arrival_timestamp", + "Arrival_Timestamp", ], ) - def test_validate_and_get_feature_resource_ids_with_raise( - self, feature_name: str, featurestore_id: str, entity_type_id: str, - ): + def test_validate_feature_id_with_raise(self, feature_id: str): with pytest.raises(ValueError): - featurestore_utils.validate_and_get_feature_resource_ids( - feature_name=feature_name, - featurestore_id=featurestore_id, - entity_type_id=entity_type_id, - ) + featurestore_utils.validate_feature_id(feature_id=feature_id) @pytest.mark.parametrize( - "entity_type_name, featurestore_id", + "value_type", [ - (_TEST_ENTITY_TYPE_NAME, None,), - (_TEST_ENTITY_TYPE_ID, _TEST_FEATURESTORE_ID,), + "BOOL", + "BOOL_ARRAY", + "DOUBLE", + "DOUBLE_ARRAY", + "INT64", + "INT64_ARRAY", + "STRING", + "STRING_ARRAY", + "BYTES", ], ) - def test_validate_and_get_entity_type_resource_ids( - self, entity_type_name: str, featurestore_id: str - ): - assert ( - _TEST_FEATURESTORE_ID, - _TEST_ENTITY_TYPE_ID, - ) == featurestore_utils.validate_and_get_entity_type_resource_ids( - entity_type_name=entity_type_name, featurestore_id=featurestore_id - ) + def test_validate_value_type(self, value_type: str): + assert featurestore_utils.validate_value_type(value_type=value_type) is None @pytest.mark.parametrize( - "entity_type_name, featurestore_id", + "value_type", [ - (_TEST_ENTITY_TYPE_INVALID, None,), - (_TEST_ENTITY_TYPE_ID, None,), - (_TEST_ENTITY_TYPE_ID, _TEST_FEATURESTORE_NAME,), + "INT", + "INT_array", + "STR", + "double", + "bool", + "array", + "INT32", + "VALUE_TYPE_UNSPECIFIED", ], ) - def test_validate_and_get_entity_type_resource_ids_with_raise( - self, entity_type_name: str, featurestore_id: str, + def test_validate_value_type_with_raise(self, value_type: str): + with pytest.raises(ValueError): + featurestore_utils.validate_value_type(value_type=value_type) + + +class Test_FeatureConfig: + def test_feature_config_return_create_feature_request(self): + + featureConfig = featurestore_utils._FeatureConfig( + feature_id=_TEST_FEATURE_ID, + value_type=_TEST_FEATURE_VALUE_TYPE, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + ) + + gapic_feature = gca_feature.Feature( + description=_TEST_DESCRIPTION, + value_type=_TEST_FEATURE_VALUE_TYPE_ENUM, + labels=_TEST_LABELS, + ) + + expected_request = gca_featurestore_service.CreateFeatureRequest( + feature=gapic_feature, feature_id=_TEST_FEATURE_ID, + ) + + assert featureConfig.get_create_feature_request() == expected_request + + def test_feature_config_create_feature_request_raises_invalid_feature_id(self): + featureConfig = featurestore_utils._FeatureConfig( + feature_id=_TEST_FEATURE_ID_INVALID, + value_type=_TEST_FEATURE_VALUE_TYPE, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + ) + with pytest.raises(ValueError): + featureConfig.get_create_feature_request() + + @pytest.mark.parametrize("value_type", ["INT", "VALUE_TYPE_UNSPECIFIED"]) + def test_feature_config_create_feature_request_raises_invalid_value_type( + self, value_type ): + featureConfig = featurestore_utils._FeatureConfig( + feature_id=_TEST_FEATURE_ID, + value_type=value_type, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + ) with pytest.raises(ValueError): - featurestore_utils.validate_and_get_entity_type_resource_ids( - entity_type_name=entity_type_name, featurestore_id=featurestore_id - ) + featureConfig.get_create_feature_request() class TestFeaturestore: @@ -438,21 +576,31 @@ def test_list_featurestores(self, list_featurestores_mock): for my_featurestore in my_featurestore_list: assert type(my_featurestore) == aiplatform.Featurestore - @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "force, sync", + [ + (None, True), + (True, True), + (False, True), + (None, False), + (True, False), + (False, False), + ], + ) @pytest.mark.usefixtures("get_featurestore_mock") - def test_delete_featurestore(self, delete_featurestore_mock, sync): + def test_delete_featurestore(self, delete_featurestore_mock, force, sync): aiplatform.init(project=_TEST_PROJECT) my_featurestore = aiplatform.Featurestore( featurestore_name=_TEST_FEATURESTORE_ID ) - my_featurestore.delete(sync=sync) + my_featurestore.delete(sync=sync, force=force) if not sync: my_featurestore.wait() delete_featurestore_mock.assert_called_once_with( - name=my_featurestore.resource_name + name=my_featurestore.resource_name, force=force, ) @pytest.mark.usefixtures("get_featurestore_mock") @@ -471,16 +619,28 @@ def test_list_entity_types(self, list_entity_types_mock): for my_entity_type in my_entity_type_list: assert type(my_entity_type) == aiplatform.EntityType - @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "force, sync", + [ + (None, True), + (True, True), + (False, True), + (None, False), + (True, False), + (False, False), + ], + ) @pytest.mark.usefixtures("get_featurestore_mock", "get_entity_type_mock") - def test_delete_entity_types(self, delete_entity_type_mock, sync): + def test_delete_entity_types(self, delete_entity_type_mock, force, sync): aiplatform.init(project=_TEST_PROJECT) my_featurestore = aiplatform.Featurestore( featurestore_name=_TEST_FEATURESTORE_ID ) my_featurestore.delete_entity_types( - entity_type_ids=[_TEST_ENTITY_TYPE_ID, _TEST_ENTITY_TYPE_ID], sync=sync + entity_type_ids=[_TEST_ENTITY_TYPE_ID, _TEST_ENTITY_TYPE_ID], + sync=sync, + force=force, ) if not sync: @@ -488,12 +648,69 @@ def test_delete_entity_types(self, delete_entity_type_mock, sync): delete_entity_type_mock.assert_has_calls( calls=[ - mock.call(name=_TEST_ENTITY_TYPE_NAME), - mock.call(name=_TEST_ENTITY_TYPE_NAME), + mock.call(name=_TEST_ENTITY_TYPE_NAME, force=force), + mock.call(name=_TEST_ENTITY_TYPE_NAME, force=force), ], any_order=True, ) + @pytest.mark.usefixtures("get_featurestore_mock", "get_entity_type_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_entity_type(self, create_entity_type_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_featurestore = aiplatform.Featurestore( + featurestore_name=_TEST_FEATURESTORE_NAME + ) + my_entity_type = my_featurestore.create_entity_type( + entity_type_id=_TEST_ENTITY_TYPE_ID, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + sync=sync, + ) + + if not sync: + my_entity_type.wait() + + expected_entity_type = gca_entity_type.EntityType( + labels=_TEST_LABELS, description=_TEST_DESCRIPTION, + ) + create_entity_type_mock.assert_called_once_with( + parent=_TEST_FEATURESTORE_NAME, + entity_type=expected_entity_type, + entity_type_id=_TEST_ENTITY_TYPE_ID, + metadata=_TEST_REQUEST_METADATA, + ) + + @pytest.mark.usefixtures("get_featurestore_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_featurestore(self, create_featurestore_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_featurestore = aiplatform.Featurestore.create( + featurestore_id=_TEST_FEATURESTORE_ID, + online_store_fixed_node_count=_TEST_ONLINE_SERVING_CONFIG, + labels=_TEST_LABELS, + encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, + ) + + if not sync: + my_featurestore.wait() + + expected_featurestore = gca_featurestore.Featurestore( + labels=_TEST_LABELS, + online_serving_config=gca_featurestore.Featurestore.OnlineServingConfig( + fixed_node_count=_TEST_ONLINE_SERVING_CONFIG + ), + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + create_featurestore_mock.assert_called_once_with( + parent=_TEST_PARENT, + featurestore=expected_featurestore, + featurestore_id=_TEST_FEATURESTORE_ID, + metadata=_TEST_REQUEST_METADATA, + ) + class TestEntityType: def setup_method(self): @@ -615,6 +832,228 @@ def test_delete_features(self, delete_feature_mock, sync): any_order=True, ) + @pytest.mark.usefixtures("get_entity_type_mock", "get_feature_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_feature(self, create_feature_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + my_feature = my_entity_type.create_feature( + feature_id=_TEST_FEATURE_ID, + value_type=_TEST_FEATURE_VALUE_TYPE, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + ) + + if not sync: + my_feature.wait() + + expected_feature = gca_feature.Feature( + value_type=_TEST_FEATURE_VALUE_TYPE_ENUM, + labels=_TEST_LABELS, + description=_TEST_DESCRIPTION, + ) + expected_request = gca_featurestore_service.CreateFeatureRequest( + parent=_TEST_ENTITY_TYPE_NAME, + feature=expected_feature, + feature_id=_TEST_FEATURE_ID, + ) + + create_feature_mock.assert_called_once_with( + request=expected_request, metadata=_TEST_REQUEST_METADATA, + ) + + @pytest.mark.usefixtures("get_entity_type_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_entity_type(self, create_entity_type_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType.create( + entity_type_id=_TEST_ENTITY_TYPE_ID, + featurestore_name=_TEST_FEATURESTORE_NAME, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + ) + + if not sync: + my_entity_type.wait() + + expected_entity_type = gca_entity_type.EntityType( + description=_TEST_DESCRIPTION, labels=_TEST_LABELS, + ) + create_entity_type_mock.assert_called_once_with( + parent=_TEST_FEATURESTORE_NAME, + entity_type=expected_entity_type, + entity_type_id=_TEST_ENTITY_TYPE_ID, + metadata=_TEST_REQUEST_METADATA, + ) + + @pytest.mark.usefixtures("get_entity_type_mock") + def test_validate_and_get_create_feature_requests(self): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + create_feature_requests = my_entity_type._validate_and_get_create_feature_requests( + feature_configs=_TEST_FEATURE_CONFIGS + ) + + expected_requests = [ + gca_featurestore_service.CreateFeatureRequest( + feature=gca_feature.Feature(value_type=_TEST_FEATURE_VALUE_TYPE_ENUM), + feature_id="my_feature_id_1", + ), + ] + assert create_feature_requests == expected_requests + + @pytest.mark.usefixtures("get_entity_type_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_batch_create_features(self, batch_create_features_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + my_entity_type.batch_create_features(feature_configs=_TEST_FEATURE_CONFIGS) + + if not sync: + my_entity_type.wait() + + expected_requests = [ + gca_featurestore_service.CreateFeatureRequest( + feature=gca_feature.Feature(value_type=_TEST_FEATURE_VALUE_TYPE_ENUM), + feature_id="my_feature_id_1", + ), + ] + + batch_create_features_mock.assert_called_once_with( + parent=my_entity_type.resource_name, + requests=expected_requests, + metadata=_TEST_REQUEST_METADATA, + ) + + @pytest.mark.usefixtures("get_entity_type_mock") + def test_validate_and_get_import_feature_values_request_with_source_fields(self): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + true_import_feature_values_request = gca_featurestore_service.ImportFeatureValuesRequest( + bigquery_source=_TEST_BQ_SOURCE, + feature_time_field=_TEST_FEATURE_TIME_FIELD, + entity_type=_TEST_ENTITY_TYPE_NAME, + feature_specs=[ + gca_featurestore_service.ImportFeatureValuesRequest.FeatureSpec( + id="my_feature_id_1", source_field="my_feature_id_1_source_field" + ), + ], + ) + assert ( + true_import_feature_values_request + == my_entity_type._validate_and_get_import_feature_values_request( + feature_ids=_TEST_IMPORTING_FEATURE_IDS, + feature_time=_TEST_FEATURE_TIME_FIELD, + data_source=_TEST_BQ_SOURCE, + feature_source_fields=_TEST_IMPORTING_FEATURE_SOURCE_FIELDS, + ) + ) + + @pytest.mark.usefixtures("get_entity_type_mock") + def test_validate_and_get_import_feature_values_request_without_source_fields(self): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + + true_import_feature_values_request = gca_featurestore_service.ImportFeatureValuesRequest( + entity_type=_TEST_ENTITY_TYPE_NAME, + feature_specs=[ + gca_featurestore_service.ImportFeatureValuesRequest.FeatureSpec( + id="my_feature_id_1" + ), + ], + csv_source=_TEST_CSV_SOURCE, + feature_time=utils.get_timestamp_proto(_TEST_FEATURE_TIME), + ) + assert ( + true_import_feature_values_request + == my_entity_type._validate_and_get_import_feature_values_request( + feature_ids=_TEST_IMPORTING_FEATURE_IDS, + feature_time=_TEST_FEATURE_TIME, + data_source=_TEST_CSV_SOURCE, + ) + ) + + @pytest.mark.usefixtures("get_entity_type_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_ingest_from_bq(self, import_feature_values_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + my_entity_type.ingest_from_bq( + feature_ids=_TEST_IMPORTING_FEATURE_IDS, + feature_time=_TEST_FEATURE_TIME_FIELD, + bq_source_uri=_TEST_BQ_SOURCE_URI, + feature_source_fields=_TEST_IMPORTING_FEATURE_SOURCE_FIELDS, + sync=sync, + ) + + if not sync: + my_entity_type.wait() + + true_import_feature_values_request = gca_featurestore_service.ImportFeatureValuesRequest( + entity_type=_TEST_ENTITY_TYPE_NAME, + feature_specs=[ + gca_featurestore_service.ImportFeatureValuesRequest.FeatureSpec( + id="my_feature_id_1", source_field="my_feature_id_1_source_field" + ), + ], + bigquery_source=_TEST_BQ_SOURCE, + feature_time_field=_TEST_FEATURE_TIME_FIELD, + ) + import_feature_values_mock.assert_called_once_with( + request=true_import_feature_values_request, metadata=_TEST_REQUEST_METADATA, + ) + + @pytest.mark.usefixtures("get_entity_type_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_ingest_from_gcs(self, import_feature_values_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + my_entity_type.ingest_from_gcs( + feature_ids=_TEST_IMPORTING_FEATURE_IDS, + feature_time=_TEST_FEATURE_TIME, + gcs_source_uris=_TEST_GCS_AVRO_SOURCE_URIS, + gcs_source_type=_TEST_GCS_SOURCE_TYPE_AVRO, + sync=sync, + ) + + if not sync: + my_entity_type.wait() + + true_import_feature_values_request = gca_featurestore_service.ImportFeatureValuesRequest( + entity_type=_TEST_ENTITY_TYPE_NAME, + feature_specs=[ + gca_featurestore_service.ImportFeatureValuesRequest.FeatureSpec( + id="my_feature_id_1" + ), + ], + avro_source=_TEST_AVRO_SOURCE, + feature_time=utils.get_timestamp_proto(_TEST_FEATURE_TIME), + ) + import_feature_values_mock.assert_called_once_with( + request=true_import_feature_values_request, metadata=_TEST_REQUEST_METADATA, + ) + + @pytest.mark.usefixtures("get_entity_type_mock") + def test_ingest_from_gcs_with_invalid_gcs_source_type(self): + aiplatform.init(project=_TEST_PROJECT) + + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + with pytest.raises(ValueError): + my_entity_type.ingest_from_gcs( + feature_ids=_TEST_IMPORTING_FEATURE_IDS, + feature_time=_TEST_FEATURE_TIME_FIELD, + gcs_source_uris=_TEST_GCS_CSV_SOURCE_URIS, + gcs_source_type=_TEST_GCS_SOURCE_TYPE_INVALID, + ) + class TestFeature: def setup_method(self): @@ -644,6 +1083,22 @@ def test_init_feature( name=_TEST_FEATURE_NAME, retry=base._DEFAULT_RETRY ) + def test_init_feature_raises_with_only_featurestore_id(self): + aiplatform.init(project=_TEST_PROJECT) + + with pytest.raises(ValueError): + aiplatform.Feature( + feature_name=_TEST_FEATURE_NAME, featurestore_id=_TEST_FEATURESTORE_ID, + ) + + def test_init_feature_raises_with_only_entity_type_id(self): + aiplatform.init(project=_TEST_PROJECT) + + with pytest.raises(ValueError): + aiplatform.Feature( + feature_name=_TEST_FEATURE_NAME, entity_type_id=_TEST_ENTITY_TYPE_ID, + ) + @pytest.mark.usefixtures("get_feature_mock") def test_get_featurestore(self, get_featurestore_mock): aiplatform.init(project=_TEST_PROJECT) @@ -717,3 +1172,34 @@ def test_search_features(self, search_features_mock): assert len(my_feature_list) == len(_TEST_FEATURE_LIST) for my_feature in my_feature_list: assert type(my_feature) == aiplatform.Feature + + @pytest.mark.usefixtures("get_feature_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_feature(self, create_feature_mock, sync): + aiplatform.init(project=_TEST_PROJECT) + + my_feature = aiplatform.Feature.create( + feature_id=_TEST_FEATURE_ID, + value_type=_TEST_FEATURE_VALUE_TYPE, + entity_type_name=_TEST_ENTITY_TYPE_ID, + featurestore_id=_TEST_FEATURESTORE_ID, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + ) + + if not sync: + my_feature.wait() + + expected_feature = gca_feature.Feature( + value_type=_TEST_FEATURE_VALUE_TYPE_ENUM, + labels=_TEST_LABELS, + description=_TEST_DESCRIPTION, + ) + create_feature_mock.assert_called_once_with( + request=gca_featurestore_service.CreateFeatureRequest( + parent=_TEST_ENTITY_TYPE_NAME, + feature=expected_feature, + feature_id=_TEST_FEATURE_ID, + ), + metadata=_TEST_REQUEST_METADATA, + ) diff --git a/tests/unit/aiplatform/test_jobs.py b/tests/unit/aiplatform/test_jobs.py index 9292ed27c3..311c418b13 100644 --- a/tests/unit/aiplatform/test_jobs.py +++ b/tests/unit/aiplatform/test_jobs.py @@ -142,11 +142,11 @@ {"sampled_shapley_attribution": {"path_count": 10}} ) -_TEST_JOB_GET_METHOD_NAME = "get_fake_job" -_TEST_JOB_LIST_METHOD_NAME = "list_fake_job" -_TEST_JOB_CANCEL_METHOD_NAME = "cancel_fake_job" -_TEST_JOB_DELETE_METHOD_NAME = "delete_fake_job" -_TEST_JOB_RESOURCE_NAME = f"{_TEST_PARENT}/fakeJobs/{_TEST_ID}" +_TEST_JOB_GET_METHOD_NAME = "get_custom_job" +_TEST_JOB_LIST_METHOD_NAME = "list_custom_job" +_TEST_JOB_CANCEL_METHOD_NAME = "cancel_custom_job" +_TEST_JOB_DELETE_METHOD_NAME = "delete_custom_job" +_TEST_JOB_RESOURCE_NAME = f"{_TEST_PARENT}/customJobs/{_TEST_ID}" # TODO(b/171333554): Move reusable test fixtures to conftest.py file @@ -170,12 +170,14 @@ def fake_job_cancel_mock(): class TestJob: class FakeJob(jobs._Job): - _job_type = "fake-job" - _resource_noun = "fakeJobs" + _job_type = "custom-job" + _resource_noun = "customJobs" _getter_method = _TEST_JOB_GET_METHOD_NAME _list_method = _TEST_JOB_LIST_METHOD_NAME _cancel_method = _TEST_JOB_CANCEL_METHOD_NAME _delete_method = _TEST_JOB_DELETE_METHOD_NAME + _parse_resource_name_method = "parse_custom_job_path" + _format_resource_name_method = "custom_job_path" resource_name = _TEST_JOB_RESOURCE_NAME def setup_method(self): diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py index 177cacfb55..bf87f3593d 100644 --- a/tests/unit/aiplatform/test_models.py +++ b/tests/unit/aiplatform/test_models.py @@ -454,7 +454,6 @@ def test_constructor_creates_client(self, create_client_mock): client_class=utils.ModelClientWithOverride, credentials=initializer.global_config.credentials, location_override=_TEST_LOCATION, - prediction_client=False, ) def test_constructor_create_client_with_custom_location(self, create_client_mock): @@ -468,7 +467,6 @@ def test_constructor_create_client_with_custom_location(self, create_client_mock client_class=utils.ModelClientWithOverride, credentials=initializer.global_config.credentials, location_override=_TEST_LOCATION_2, - prediction_client=False, ) def test_constructor_creates_client_with_custom_credentials( @@ -480,7 +478,6 @@ def test_constructor_creates_client_with_custom_credentials( client_class=utils.ModelClientWithOverride, credentials=creds, location_override=_TEST_LOCATION, - prediction_client=False, ) def test_constructor_gets_model(self, get_model_mock): diff --git a/tests/unit/aiplatform/test_tensorboard.py b/tests/unit/aiplatform/test_tensorboard.py index 38ea935950..1a1d20b97a 100644 --- a/tests/unit/aiplatform/test_tensorboard.py +++ b/tests/unit/aiplatform/test_tensorboard.py @@ -37,9 +37,11 @@ ) from google.cloud.aiplatform_v1.types import ( + encryption_spec as gca_encryption_spec, tensorboard as gca_tensorboard, + tensorboard_experiment as gca_tensorboard_experiment, + tensorboard_run as gca_tensorboard_run, tensorboard_service as gca_tensorboard_service, - encryption_spec as gca_encryption_spec, ) from google.protobuf import field_mask_pb2 @@ -66,6 +68,16 @@ ) _TEST_INVALID_NAME = f"prj/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/{_TEST_ID}" +_TEST_TENSORBOARD_EXPERIMENT_ID = "test-experiment" +_TEST_TENSORBOARD_EXPERIMENT_NAME = ( + f"{_TEST_NAME}/experiments/{_TEST_TENSORBOARD_EXPERIMENT_ID}" +) + +_TEST_TENSORBOARD_RUN_ID = "test-run" +_TEST_TENSORBOARD_RUN_NAME = ( + f"{_TEST_TENSORBOARD_EXPERIMENT_NAME}/runs/{_TEST_TENSORBOARD_RUN_ID}" +) + # request_metadata _TEST_REQUEST_METADATA = () @@ -132,6 +144,108 @@ def delete_tensorboard_mock(): yield delete_tensorboard_mock +@pytest.fixture +def get_tensorboard_experiment_mock(): + with patch.object( + tensorboard_service_client.TensorboardServiceClient, + "get_tensorboard_experiment", + ) as get_tensorboard_experiment__mock: + get_tensorboard_experiment__mock.return_value = gca_tensorboard_experiment.TensorboardExperiment( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, display_name=_TEST_DISPLAY_NAME, + ) + yield get_tensorboard_experiment__mock + + +@pytest.fixture +def create_tensorboard_experiment_mock(): + with patch.object( + tensorboard_service_client.TensorboardServiceClient, + "create_tensorboard_experiment", + ) as create_tensorboard_experiment_mock: + create_tensorboard_experiment_mock.return_value = gca_tensorboard_experiment.TensorboardExperiment( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, display_name=_TEST_DISPLAY_NAME, + ) + yield create_tensorboard_experiment_mock + + +@pytest.fixture +def delete_tensorboard_experiment_mock(): + with mock.patch.object( + tensorboard_service_client.TensorboardServiceClient, + "delete_tensorboard_experiment", + ) as delete_tensorboard_experiment_mock: + delete_tensorboard_lro_experiment_mock = mock.Mock(operation.Operation) + delete_tensorboard_lro_experiment_mock.result.return_value = gca_tensorboard_service.DeleteTensorboardExperimentRequest( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, + ) + delete_tensorboard_experiment_mock.return_value = ( + delete_tensorboard_lro_experiment_mock + ) + yield delete_tensorboard_experiment_mock + + +@pytest.fixture +def list_tensorboard_experiment_mock(): + with patch.object( + tensorboard_service_client.TensorboardServiceClient, + "list_tensorboard_experiments", + ) as list_tensorboard_experiment_mock: + list_tensorboard_experiment_mock.return_value = [ + gca_tensorboard_experiment.TensorboardExperiment( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, display_name=_TEST_DISPLAY_NAME, + ) + ] + yield list_tensorboard_experiment_mock + + +@pytest.fixture +def get_tensorboard_run_mock(): + with patch.object( + tensorboard_service_client.TensorboardServiceClient, "get_tensorboard_run", + ) as get_tensorboard_run_mock: + get_tensorboard_run_mock.return_value = gca_tensorboard_run.TensorboardRun( + name=_TEST_TENSORBOARD_RUN_NAME, display_name=_TEST_DISPLAY_NAME, + ) + yield get_tensorboard_run_mock + + +@pytest.fixture +def create_tensorboard_run_mock(): + with patch.object( + tensorboard_service_client.TensorboardServiceClient, "create_tensorboard_run", + ) as create_tensorboard_run_mock: + create_tensorboard_run_mock.return_value = gca_tensorboard_run.TensorboardRun( + name=_TEST_TENSORBOARD_RUN_NAME, display_name=_TEST_DISPLAY_NAME, + ) + yield create_tensorboard_run_mock + + +@pytest.fixture +def delete_tensorboard_run_mock(): + with mock.patch.object( + tensorboard_service_client.TensorboardServiceClient, "delete_tensorboard_run", + ) as delete_tensorboard_run_mock: + delete_tensorboard_lro_run_mock = mock.Mock(operation.Operation) + delete_tensorboard_lro_run_mock.result.return_value = gca_tensorboard_service.DeleteTensorboardRunRequest( + name=_TEST_TENSORBOARD_RUN_NAME, + ) + delete_tensorboard_run_mock.return_value = delete_tensorboard_lro_run_mock + yield delete_tensorboard_run_mock + + +@pytest.fixture +def list_tensorboard_run_mock(): + with patch.object( + tensorboard_service_client.TensorboardServiceClient, "list_tensorboard_runs", + ) as list_tensorboard_run_mock: + list_tensorboard_run_mock.return_value = [ + gca_tensorboard_run.TensorboardRun( + name=_TEST_TENSORBOARD_RUN_NAME, display_name=_TEST_DISPLAY_NAME, + ) + ] + yield list_tensorboard_run_mock + + class TestTensorboard: def setup_method(self): reload(initializer) @@ -300,3 +414,192 @@ def test_update_tensorboard_encryption_spec(self, update_tensorboard_mock): tensorboard=expected_tensorboard, metadata=_TEST_REQUEST_METADATA, ) + + +class TestTensorboardExperiment: + def setup_method(self): + reload(initializer) + reload(aiplatform) + + def teardown_method(self): + initializer.global_pool.shutdown(wait=True) + + def test_init_tensorboard_experiment(self, get_tensorboard_experiment_mock): + aiplatform.init(project=_TEST_PROJECT) + tensorboard.TensorboardExperiment( + tensorboard_experiment_name=_TEST_TENSORBOARD_EXPERIMENT_NAME + ) + get_tensorboard_experiment_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, retry=base._DEFAULT_RETRY + ) + + def test_init_tensorboard_experiment_with_tensorboard( + self, get_tensorboard_experiment_mock + ): + aiplatform.init(project=_TEST_PROJECT) + tensorboard.TensorboardExperiment( + tensorboard_experiment_name=_TEST_TENSORBOARD_EXPERIMENT_ID, + tensorboard_id=_TEST_ID, + ) + get_tensorboard_experiment_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, retry=base._DEFAULT_RETRY + ) + + def test_init_tensorboard_experiment_with_id_only_with_project_and_location( + self, get_tensorboard_experiment_mock + ): + aiplatform.init(project=_TEST_PROJECT) + tensorboard.TensorboardExperiment( + tensorboard_experiment_name=_TEST_TENSORBOARD_EXPERIMENT_ID, + tensorboard_id=_TEST_ID, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + ) + get_tensorboard_experiment_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, retry=base._DEFAULT_RETRY + ) + + def test_create_tensorboard_experiment( + self, create_tensorboard_experiment_mock, get_tensorboard_experiment_mock + ): + + aiplatform.init(project=_TEST_PROJECT,) + + tensorboard.TensorboardExperiment.create( + tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID, + tensorboard_name=_TEST_NAME, + display_name=_TEST_DISPLAY_NAME, + ) + + expected_tensorboard_experiment = gca_tensorboard_experiment.TensorboardExperiment( + display_name=_TEST_DISPLAY_NAME, + ) + + create_tensorboard_experiment_mock.assert_called_once_with( + parent=_TEST_NAME, + tensorboard_experiment=expected_tensorboard_experiment, + tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID, + metadata=_TEST_REQUEST_METADATA, + ) + + get_tensorboard_experiment_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_EXPERIMENT_NAME, retry=base._DEFAULT_RETRY + ) + + @pytest.mark.usefixtures("get_tensorboard_experiment_mock") + def test_delete_tensorboard_experiement(self, delete_tensorboard_experiment_mock): + aiplatform.init(project=_TEST_PROJECT) + + my_tensorboard_experiment = tensorboard.TensorboardExperiment( + tensorboard_experiment_name=_TEST_TENSORBOARD_EXPERIMENT_NAME + ) + + my_tensorboard_experiment.delete() + + delete_tensorboard_experiment_mock.assert_called_once_with( + name=my_tensorboard_experiment.resource_name + ) + + def test_list_tensorboard_experiments(self, list_tensorboard_experiment_mock): + aiplatform.init(project=_TEST_PROJECT) + + tensorboard.TensorboardExperiment.list(tensorboard_name=_TEST_NAME) + + list_tensorboard_experiment_mock.assert_called_once_with( + request={"parent": _TEST_NAME, "filter": None} + ) + + +class TestTensorboardRun: + def setup_method(self): + reload(initializer) + reload(aiplatform) + + def teardown_method(self): + initializer.global_pool.shutdown(wait=True) + + def test_init_tensorboard_run(self, get_tensorboard_run_mock): + aiplatform.init(project=_TEST_PROJECT) + tensorboard.TensorboardRun(tensorboard_run_name=_TEST_TENSORBOARD_RUN_NAME) + get_tensorboard_run_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_RUN_NAME, retry=base._DEFAULT_RETRY + ) + + def test_init_tensorboard_run_with_tensorboard_and_experiment( + self, get_tensorboard_run_mock + ): + aiplatform.init(project=_TEST_PROJECT) + tensorboard.TensorboardRun( + tensorboard_run_name=_TEST_TENSORBOARD_RUN_ID, + tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID, + tensorboard_id=_TEST_ID, + ) + get_tensorboard_run_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_RUN_NAME, retry=base._DEFAULT_RETRY + ) + + def test_init_tensorboard_run_with_id_only_with_project_and_location( + self, get_tensorboard_run_mock + ): + aiplatform.init(project=_TEST_PROJECT) + tensorboard.TensorboardRun( + tensorboard_run_name=_TEST_TENSORBOARD_RUN_ID, + tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID, + tensorboard_id=_TEST_ID, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + ) + get_tensorboard_run_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_RUN_NAME, retry=base._DEFAULT_RETRY + ) + + def test_create_tensorboard_run( + self, create_tensorboard_run_mock, get_tensorboard_run_mock + ): + + aiplatform.init(project=_TEST_PROJECT,) + + tensorboard.TensorboardRun.create( + tensorboard_run_id=_TEST_TENSORBOARD_RUN_ID, + tensorboard_experiment_name=_TEST_TENSORBOARD_EXPERIMENT_NAME, + ) + + expected_tensorboard_run = gca_tensorboard_run.TensorboardRun( + display_name=_TEST_TENSORBOARD_RUN_ID, + ) + + create_tensorboard_run_mock.assert_called_once_with( + parent=_TEST_TENSORBOARD_EXPERIMENT_NAME, + tensorboard_run=expected_tensorboard_run, + tensorboard_run_id=_TEST_TENSORBOARD_RUN_ID, + metadata=_TEST_REQUEST_METADATA, + ) + + get_tensorboard_run_mock.assert_called_once_with( + name=_TEST_TENSORBOARD_RUN_NAME, retry=base._DEFAULT_RETRY + ) + + @pytest.mark.usefixtures("get_tensorboard_run_mock") + def test_delete_tensorboard_run(self, delete_tensorboard_run_mock): + aiplatform.init(project=_TEST_PROJECT) + + my_tensorboard_run = tensorboard.TensorboardRun( + tensorboard_run_name=_TEST_TENSORBOARD_RUN_NAME + ) + + my_tensorboard_run.delete() + + delete_tensorboard_run_mock.assert_called_once_with( + name=my_tensorboard_run.resource_name + ) + + def test_list_tensorboard_runs(self, list_tensorboard_run_mock): + aiplatform.init(project=_TEST_PROJECT) + + tensorboard.TensorboardRun.list( + tensorboard_experiment_name=_TEST_TENSORBOARD_EXPERIMENT_NAME + ) + + list_tensorboard_run_mock.assert_called_once_with( + request={"parent": _TEST_TENSORBOARD_EXPERIMENT_NAME, "filter": None} + ) diff --git a/tests/unit/aiplatform/test_utils.py b/tests/unit/aiplatform/test_utils.py index 928b01a889..d4840609b1 100644 --- a/tests/unit/aiplatform/test_utils.py +++ b/tests/unit/aiplatform/test_utils.py @@ -17,10 +17,11 @@ import pytest -from uuid import uuid4 -from random import choice -from random import randint -from string import ascii_letters +from typing import Callable, Dict, Optional +import datetime +from decimal import Decimal + +from google.protobuf import timestamp_pb2 from google.api_core import client_options from google.api_core import gapic_v1 @@ -40,97 +41,6 @@ model_service_client_default = model_service_client_v1 -@pytest.mark.parametrize( - "resource_name, expected", - [ - ("projects/123456/locations/us-central1/datasets/987654", True), - ("projects/857392/locations/us-central1/trainingPipelines/347292", True), - ("projects/acme-co-proj-1/locations/us-central1/datasets/123456", True), - ("projects/acme-co-proj-1/locations/us-central1/datasets/abcdef", True), - ("projects/acme-co-proj-1/locations/us-central1/datasets/abc-def", True), - ("project/123456/locations/us-central1/datasets/987654", False), - ("project//locations//datasets/987654", False), - ("locations/europe-west4/datasets/987654", False), - ("987654", False), - ], -) -def test_extract_fields_from_resource_name(resource_name: str, expected: bool): - # Given a resource name and expected validity, test extract_fields_from_resource_name() - assert expected == bool(utils.extract_fields_from_resource_name(resource_name)) - - -@pytest.fixture -def generated_resource_fields(): - generated_fields = utils.Fields( - project=str(uuid4()), - location=str(uuid4()), - resource="".join(choice(ascii_letters) for i in range(10)), # 10 random letters - id=str(randint(0, 100000)), - ) - - yield generated_fields - - -@pytest.fixture -def generated_resource_name(generated_resource_fields: utils.Fields): - name = ( - f"projects/{generated_resource_fields.project}/" - f"locations/{generated_resource_fields.location}" - f"/{generated_resource_fields.resource}/{generated_resource_fields.id}" - ) - - yield name - - -def test_extract_fields_from_resource_name_with_extracted_fields( - generated_resource_name: str, generated_resource_fields: utils.Fields -): - """Verify fields extracted from resource name match the original fields""" - - assert ( - utils.extract_fields_from_resource_name(resource_name=generated_resource_name) - == generated_resource_fields - ) - - -@pytest.mark.parametrize( - "resource_name, resource_noun, expected", - [ - # Expects pattern "projects/.../locations/.../datasets/..." - ("projects/123456/locations/us-central1/datasets/987654", "datasets", True), - # Expects pattern "projects/.../locations/.../batchPredictionJobs/..." - ( - "projects/857392/locations/us-central1/trainingPipelines/347292", - "batchPredictionJobs", - False, - ), - # Expects pattern "projects/.../locations/.../metadataStores/.../contexts/..." - ( - "projects/857392/locations/us-central1/metadataStores/default/contexts/123", - "metadataStores/default/contexts", - True, - ), - # Expects pattern "projects/.../locations/.../tensorboards/.../experiments/.../runs/.../timeSeries/..." - ( - "projects/857392/locations/us-central1/tensorboards/123/experiments/456/runs/789/timeSeries/1", - "tensorboards/123/experiments/456/runs/789/timeSeries", - True, - ), - ], -) -def test_extract_fields_from_resource_name_with_resource_noun( - resource_name: str, resource_noun: str, expected: bool -): - assert ( - bool( - utils.extract_fields_from_resource_name( - resource_name=resource_name, resource_noun=resource_noun - ) - ) - == expected - ) - - def test_invalid_region_raises_with_invalid_region(): with pytest.raises(ValueError): aiplatform.utils.validate_region(region="us-west4") @@ -141,42 +51,67 @@ def test_invalid_region_does_not_raise_with_valid_region(): @pytest.mark.parametrize( - "resource_noun, project, location, full_name", + "resource_noun, project, parse_resource_name_method, format_resource_name_method, parent_resource_name_fields, location, full_name", [ ( "datasets", "123456", + aiplatform.TabularDataset._parse_resource_name, + aiplatform.TabularDataset._format_resource_name, + None, "us-central1", "projects/123456/locations/us-central1/datasets/987654", ), ( "trainingPipelines", "857392", + aiplatform.CustomTrainingJob._parse_resource_name, + aiplatform.CustomTrainingJob._format_resource_name, + None, "us-west20", "projects/857392/locations/us-central1/trainingPipelines/347292", ), ( - "metadataStores/default/contexts", + "contexts", "123456", + aiplatform.metadata._Context._parse_resource_name, + aiplatform.metadata._Context._format_resource_name, + {aiplatform.metadata._MetadataStore._resource_noun: "default"}, "europe-west4", "projects/857392/locations/us-central1/metadataStores/default/contexts/123", ), ( - "tensorboards/123/experiments/456/runs/789/timeSeries", + "timeSeries", "857392", + aiplatform.gapic.TensorboardServiceClient.parse_tensorboard_time_series_path, + aiplatform.gapic.TensorboardServiceClient.tensorboard_time_series_path, + { + aiplatform.Tensorboard._resource_noun: "123", + "experiments": "456", + "runs": "789", + }, "us-central1", "projects/857392/locations/us-central1/tensorboards/123/experiments/456/runs/789/timeSeries/1", ), ], ) def test_full_resource_name_with_full_name( - resource_noun: str, project: str, location: str, full_name: str, + resource_noun: str, + project: str, + parse_resource_name_method: Callable[[str], Dict[str, str]], + format_resource_name_method: Callable[..., str], + parent_resource_name_fields: Optional[Dict[str, str]], + location: str, + full_name: str, ): # should ignore issues with other arguments as resource_name is full_name assert ( aiplatform.utils.full_resource_name( resource_name=full_name, resource_noun=resource_noun, + parse_resource_name_method=parse_resource_name_method, + format_resource_name_method=format_resource_name_method, + parent_resource_name_fields=parent_resource_name_fields, project=project, location=location, ) @@ -185,11 +120,14 @@ def test_full_resource_name_with_full_name( @pytest.mark.parametrize( - "partial_name, resource_noun, project, location, full_name", + "partial_name, resource_noun, parse_resource_name_method, format_resource_name_method, parent_resource_name_fields, project, location, full_name", [ ( "987654", "datasets", + aiplatform.TabularDataset._parse_resource_name, + aiplatform.TabularDataset._format_resource_name, + None, "123456", "us-central1", "projects/123456/locations/us-central1/datasets/987654", @@ -197,20 +135,33 @@ def test_full_resource_name_with_full_name( ( "347292", "trainingPipelines", + aiplatform.CustomTrainingJob._parse_resource_name, + aiplatform.CustomTrainingJob._format_resource_name, + None, "857392", "us-central1", "projects/857392/locations/us-central1/trainingPipelines/347292", ), ( "123", - "metadataStores/default/contexts", + "contexts", + aiplatform.metadata._Context._parse_resource_name, + aiplatform.metadata._Context._format_resource_name, + {aiplatform.metadata._MetadataStore._resource_noun: "default"}, "857392", "us-central1", "projects/857392/locations/us-central1/metadataStores/default/contexts/123", ), ( "1", - "tensorboards/123/experiments/456/runs/789/timeSeries", + "timeSeries", + aiplatform.gapic.TensorboardServiceClient.parse_tensorboard_time_series_path, + aiplatform.gapic.TensorboardServiceClient.tensorboard_time_series_path, + { + aiplatform.Tensorboard._resource_noun: "123", + "experiments": "456", + "runs": "789", + }, "857392", "us-central1", "projects/857392/locations/us-central1/tensorboards/123/experiments/456/runs/789/timeSeries/1", @@ -218,12 +169,22 @@ def test_full_resource_name_with_full_name( ], ) def test_full_resource_name_with_partial_name( - partial_name: str, resource_noun: str, project: str, location: str, full_name: str, + partial_name: str, + resource_noun: str, + parse_resource_name_method: Callable[[str], Dict[str, str]], + format_resource_name_method: Callable[..., str], + parent_resource_name_fields: Optional[Dict[str, str]], + project: str, + location: str, + full_name: str, ): assert ( aiplatform.utils.full_resource_name( resource_name=partial_name, resource_noun=resource_noun, + parse_resource_name_method=parse_resource_name_method, + format_resource_name_method=format_resource_name_method, + parent_resource_name_fields=parent_resource_name_fields, project=project, location=location, ) @@ -242,6 +203,8 @@ def test_full_resource_name_raises_value_error( aiplatform.utils.full_resource_name( resource_name=partial_name, resource_noun=resource_noun, + parse_resource_name_method=aiplatform.CustomTrainingJob._parse_resource_name, + format_resource_name_method=aiplatform.CustomTrainingJob._format_resource_name, project=project, location=location, ) @@ -360,6 +323,60 @@ def test_client_w_override_select_version(): ) +@pytest.mark.parametrize( + "year,month,day,hour,minute,second,microsecond,expected_seconds,expected_nanos", + [ + ( + 2021, + 12, + 23, + 23, + 59, + 59, + 999999, + 1640303999, + int(str(Decimal(1640303999.999999)).split(".")[1][:9]), + ), + ( + 2013, + 1, + 1, + 1, + 1, + 1, + 199999, + 1357002061, + int(str(Decimal(1357002061.199999)).split(".")[1][:9]), + ), + ], +) +def test_get_timestamp_proto( + year, + month, + day, + hour, + minute, + second, + microsecond, + expected_seconds, + expected_nanos, +): + time = datetime.datetime( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + tzinfo=datetime.timezone.utc, + ) + true_timestamp_proto = timestamp_pb2.Timestamp( + seconds=expected_seconds, nanos=expected_nanos + ) + assert true_timestamp_proto == utils.get_timestamp_proto(time) + + class TestPipelineUtils: SAMPLE_JOB_SPEC = { "pipelineSpec": {