import asyncio
import signal
from pydantic_settings import BaseSettings, SettingsConfigDict
from vocode.helpers import create_streaming_microphone_input_and_speaker_output
from vocode.logging import configure_pretty_logging
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
from vocode.streaming.models.agent import ChatGPTAgentConfig
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
)
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
configure_pretty_logging()
class Settings(BaseSettings):
"""
Settings for the streaming conversation quickstart.
These parameters can be configured with environment variables.
"""
openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"
deepgram_api_key: str = "ENTER_YOUR_DEEPGRAM_API_KEY_HERE"
azure_speech_region: str = "eastus"
# This means a .env file can be used to overload these settings
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore"
)
settings = Settings()
async def main():
(
microphone_input,
speaker_output,
) = create_streaming_microphone_input_and_speaker_output(
use_default_devices=False,
use_blocking_speaker_output=True, # this moves the playback to a separate thread, set to False to use the main thread
)
conversation = StreamingConversation(
output_device=speaker_output,
transcriber=DeepgramTranscriber(
DeepgramTranscriberConfig.from_input_device(
microphone_input,
endpointing_config=PunctuationEndpointingConfig(),
api_key=settings.deepgram_api_key,
),
),
agent=ChatGPTAgent(
ChatGPTAgentConfig(
openai_api_key=settings.openai_api_key,
initial_message=BaseMessage(text="What up"),
prompt_preamble="""The AI is having a pleasant conversation about life""",
)
),
synthesizer=AzureSynthesizer(
AzureSynthesizerConfig.from_output_device(speaker_output),
azure_speech_key=settings.azure_speech_key,
azure_speech_region=settings.azure_speech_region,
),
)
await conversation.start()
print("Conversation started, press Ctrl+C to end")
signal.signal(signal.SIGINT, lambda _0, _1: asyncio.create_task(conversation.terminate()))
while conversation.is_active():
chunk = await microphone_input.get_audio()
conversation.receive_audio(chunk)
if __name__ == "__main__":
asyncio.run(main())