>>> import json
>>> # Basic transcription without timestamps
>>> _ = session.sql("CREATE OR REPLACE TEMP STAGE mystage ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE')").collect()
>>> _ = session.file.put("tests/resources/audio.ogg", "@mystage", auto_compress=False)
>>> from snowflake.snowpark.functions import col, to_file
>>> df = session.create_dataframe([["@mystage/audio.ogg"]], schema=["audio_path"]) # staged file path
>>> result_df = df.ai.transcribe(
... input_column=to_file(col("audio_path")),
... output_column="transcript",
... )
>>> result_df.columns
['AUDIO_PATH', 'TRANSCRIPT']
>>> result = json.loads(result_df.collect()[0]["TRANSCRIPT"])
>>> result['audio_duration'] > 120
True
>>> "glad to see things are going well" in result['text'].lower()
True
>>> # Transcription with word-level timestamps
>>> result_df = df.ai.transcribe(
... input_column=to_file(col("audio_path")),
... output_column="transcript",
... timestamp_granularity='word',
... )
>>> result = json.loads(result_df.collect()[0]["TRANSCRIPT"])
>>> len(result["segments"]) > 0
True
>>> result["segments"][0]["text"].lower()
'the'
>>> 'start' in result["segments"][0] and 'end' in result["segments"][0]
True
>>> # Transcription with speaker diarization (requires a multi-speaker audio file)
>>> _ = session.file.put("tests/resources/conversation.ogg", "@mystage", auto_compress=False)
>>> df = session.create_dataframe([["@mystage/conversation.ogg"]], schema=["audio_path"])
>>> result_df = df.ai.transcribe(
... input_column=to_file(col("audio_path")),
... output_column="transcript",
... timestamp_granularity='speaker',
... )
>>> result = json.loads(result_df.collect()[0]["TRANSCRIPT"])
>>> result["audio_duration"] > 100 and len(result["segments"]) > 0
True
>>> result["segments"][0]["speaker_label"]
'SPEAKER_00'
>>> 'jenny' in result["segments"][0]["text"].lower()
True
>>> 'start' in result["segments"][0] and 'end' in result["segments"][0]
True