diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index af599353..0ddb512d 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -4,6 +4,8 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` - pattern: master + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'cla/google' diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d328dac..e3e0fbdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-speech/#history +## [2.7.0](https://www.github.com/googleapis/python-speech/compare/v2.6.0...v2.7.0) (2021-08-10) + + +### Features + +* add total_billed_time response field ([#224](https://www.github.com/googleapis/python-speech/issues/224)) ([07b5203](https://www.github.com/googleapis/python-speech/commit/07b5203a15a186aab537442a4f4a4071aab3046f)) + ## [2.6.0](https://www.github.com/googleapis/python-speech/compare/v2.5.1...v2.6.0) (2021-07-28) diff --git a/google/cloud/speech_v1/services/speech/async_client.py b/google/cloud/speech_v1/services/speech/async_client.py index 2c06a667..83f54612 100644 --- a/google/cloud/speech_v1/services/speech/async_client.py +++ b/google/cloud/speech_v1/services/speech/async_client.py @@ -38,6 +38,7 @@ from google.api_core import operation # type: ignore from google.api_core import operation_async # type: ignore from google.cloud.speech_v1.types import cloud_speech +from google.protobuf import duration_pb2 # type: ignore from google.rpc import status_pb2 # type: ignore from .transports.base import SpeechTransport, DEFAULT_CLIENT_INFO from .transports.grpc_asyncio import SpeechGrpcAsyncIOTransport @@ -379,7 +380,7 @@ def streaming_recognize( single_utterance is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten + Here's an example of a series of StreamingRecognizeResponses that might be returned while processing audio: diff --git a/google/cloud/speech_v1/services/speech/client.py b/google/cloud/speech_v1/services/speech/client.py index 8932e4c3..fe9a005f 100644 --- a/google/cloud/speech_v1/services/speech/client.py +++ b/google/cloud/speech_v1/services/speech/client.py @@ -43,6 +43,7 @@ from google.api_core import operation # type: ignore from google.api_core import operation_async # type: ignore from google.cloud.speech_v1.types import cloud_speech +from google.protobuf import duration_pb2 # type: ignore from google.rpc import status_pb2 # type: ignore from .transports.base import SpeechTransport, DEFAULT_CLIENT_INFO from .transports.grpc import SpeechGrpcTransport @@ -553,7 +554,7 @@ def streaming_recognize( single_utterance is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten + Here's an example of a series of StreamingRecognizeResponses that might be returned while processing audio: diff --git a/google/cloud/speech_v1/types/cloud_speech.py b/google/cloud/speech_v1/types/cloud_speech.py index f1420b46..51cbdd63 100644 --- a/google/cloud/speech_v1/types/cloud_speech.py +++ b/google/cloud/speech_v1/types/cloud_speech.py @@ -138,6 +138,17 @@ class StreamingRecognitionConfig(proto.Message): ``END_OF_SINGLE_UTTERANCE`` event and cease recognition. It will return no more than one ``StreamingRecognitionResult`` with the ``is_final`` flag set to ``true``. + + The ``single_utterance`` field can only be used with + specified models, otherwise an error is thrown. The + ``model`` field in [``RecognitionConfig``][] must be set to: + + - ``command_and_search`` + - ``phone_call`` AND additional field + ``useEnhanced``\ =\ ``true`` + - The ``model`` field is left undefined. In this case the + API auto-selects a model based on any other parameters + that you set in ``RecognitionConfig``. interim_results (bool): If ``true``, interim results (tentative hypotheses) may be returned as they become available (these interim results are @@ -214,7 +225,7 @@ class RecognitionConfig(proto.Message): [SpeechContext][google.cloud.speech.v1.SpeechContext]. A means to provide context to assist the speech recognition. For more information, see `speech - adaptation `__. + adaptation `__. enable_word_time_offsets (bool): If ``true``, the top result includes a list of words and the start and end time offsets (timestamps) for those words. If @@ -226,11 +237,7 @@ class RecognitionConfig(proto.Message): available in select languages. Setting this for requests in other languages has no effect at all. The default 'false' value does not add - punctuation to result hypotheses. Note: This is - currently offered as an experimental service, - complimentary to all users. In the future this - may be exclusively available as a premium - feature. + punctuation to result hypotheses. diarization_config (google.cloud.speech_v1.types.SpeakerDiarizationConfig): Config to enable speaker diarization and set additional parameters to make diarization better @@ -270,7 +277,7 @@ class RecognitionConfig(proto.Message): video - Best for audio that originated from from video or includes multiple + Best for audio that originated from video or includes multiple speakers. Ideally the audio is recorded at a 16khz or greater sampling rate. This is a premium model that costs more than the standard rate. @@ -306,7 +313,7 @@ class AudioEncoding(proto.Enum): The accuracy of the speech recognition can be reduced if lossy codecs are used to capture or transmit audio, particularly if background noise is present. Lossy codecs include ``MULAW``, - ``AMR``, ``AMR_WB``, ``OGG_OPUS``, ``SPEEX_WITH_HEADER_BYTE``, and + ``AMR``, ``AMR_WB``, ``OGG_OPUS``, ``SPEEX_WITH_HEADER_BYTE``, ``MP3``. The ``FLAC`` and ``WAV`` audio file formats include a header that @@ -370,7 +377,7 @@ class SpeakerDiarizationConfig(proto.Message): automatically determine the correct number of speakers. If not set, the default value is 6. speaker_tag (int): - Unused. + Output only. Unused. """ enable_speaker_diarization = proto.Field(proto.BOOL, number=1,) @@ -531,11 +538,17 @@ class RecognizeResponse(proto.Message): results (Sequence[google.cloud.speech_v1.types.SpeechRecognitionResult]): Sequential list of transcription results corresponding to sequential portions of audio. + total_billed_time (google.protobuf.duration_pb2.Duration): + When available, billed audio seconds for the + corresponding request. """ results = proto.RepeatedField( proto.MESSAGE, number=2, message="SpeechRecognitionResult", ) + total_billed_time = proto.Field( + proto.MESSAGE, number=3, message=duration_pb2.Duration, + ) class LongRunningRecognizeResponse(proto.Message): @@ -550,11 +563,17 @@ class LongRunningRecognizeResponse(proto.Message): results (Sequence[google.cloud.speech_v1.types.SpeechRecognitionResult]): Sequential list of transcription results corresponding to sequential portions of audio. + total_billed_time (google.protobuf.duration_pb2.Duration): + When available, billed audio seconds for the + corresponding request. """ results = proto.RepeatedField( proto.MESSAGE, number=2, message="SpeechRecognitionResult", ) + total_billed_time = proto.Field( + proto.MESSAGE, number=3, message=duration_pb2.Duration, + ) class LongRunningRecognizeMetadata(proto.Message): @@ -572,6 +591,10 @@ class LongRunningRecognizeMetadata(proto.Message): Time when the request was received. last_update_time (google.protobuf.timestamp_pb2.Timestamp): Time of the most recent processing update. + uri (str): + Output only. The URI of the audio file being + transcribed. Empty if the audio was sent as byte + content. """ progress_percent = proto.Field(proto.INT32, number=1,) @@ -579,6 +602,7 @@ class LongRunningRecognizeMetadata(proto.Message): last_update_time = proto.Field( proto.MESSAGE, number=3, message=timestamp_pb2.Timestamp, ) + uri = proto.Field(proto.STRING, number=4,) class StreamingRecognizeResponse(proto.Message): @@ -588,9 +612,8 @@ class StreamingRecognizeResponse(proto.Message): client. If there is no recognizable audio, and ``single_utterance`` is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten - ``StreamingRecognizeResponse``\ s that might be returned while - processing audio: + Here's an example of a series of ``StreamingRecognizeResponse``\ s + that might be returned while processing audio: 1. results { alternatives { transcript: "tube" } stability: 0.01 } @@ -648,6 +671,10 @@ class StreamingRecognizeResponse(proto.Message): ``is_final=false`` results (the interim results). speech_event_type (google.cloud.speech_v1.types.StreamingRecognizeResponse.SpeechEventType): Indicates the type of speech event. + total_billed_time (google.protobuf.duration_pb2.Duration): + When available, billed audio seconds for the + stream. Set only if this is the last response in + the stream. """ class SpeechEventType(proto.Enum): @@ -660,6 +687,9 @@ class SpeechEventType(proto.Enum): proto.MESSAGE, number=2, message="StreamingRecognitionResult", ) speech_event_type = proto.Field(proto.ENUM, number=4, enum=SpeechEventType,) + total_billed_time = proto.Field( + proto.MESSAGE, number=5, message=duration_pb2.Duration, + ) class StreamingRecognitionResult(proto.Message): @@ -784,12 +814,12 @@ class WordInfo(proto.Message): The word corresponding to this set of information. speaker_tag (int): - A distinct integer value is assigned for every speaker - within the audio. This field specifies which one of those - speakers was detected to have spoken this word. Value ranges - from '1' to diarization_speaker_count. speaker_tag is set if - enable_speaker_diarization = 'true' and only in the top - alternative. + Output only. A distinct integer value is assigned for every + speaker within the audio. This field specifies which one of + those speakers was detected to have spoken this word. Value + ranges from '1' to diarization_speaker_count. speaker_tag is + set if enable_speaker_diarization = 'true' and only in the + top alternative. """ start_time = proto.Field(proto.MESSAGE, number=1, message=duration_pb2.Duration,) diff --git a/samples/microphone/requirements.txt b/samples/microphone/requirements.txt index c298ce23..bb462e2e 100644 --- a/samples/microphone/requirements.txt +++ b/samples/microphone/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-speech==2.5.1 +google-cloud-speech==2.6.0 pyaudio==0.2.11 six==1.16.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 600643d8..c8b635e3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-speech==2.5.1 +google-cloud-speech==2.6.0 google-cloud-storage==1.41.1 diff --git a/setup.py b/setup.py index 57b64cc7..5ff31b9a 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-speech" description = "Google Cloud Speech API client library" -version = "2.6.0" +version = "2.7.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' diff --git a/tests/unit/gapic/speech_v1/test_speech.py b/tests/unit/gapic/speech_v1/test_speech.py index f07d10cd..23e6806d 100644 --- a/tests/unit/gapic/speech_v1/test_speech.py +++ b/tests/unit/gapic/speech_v1/test_speech.py @@ -41,6 +41,7 @@ from google.cloud.speech_v1.types import cloud_speech from google.longrunning import operations_pb2 from google.oauth2 import service_account +from google.protobuf import duration_pb2 # type: ignore from google.rpc import status_pb2 # type: ignore import google.auth