You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1112 lines
28 KiB
1112 lines
28 KiB
{
|
|
"Categories": [
|
|
"SPIESettings",
|
|
"Project Setup",
|
|
"Avatar Core",
|
|
"STT Settings",
|
|
"STT",
|
|
"AI",
|
|
"TTS"
|
|
],
|
|
"Variables": [
|
|
{
|
|
"UseAvatarWithSafetyVest":
|
|
{
|
|
"type": "boolean",
|
|
"default": true,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"LogoText":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Showing in background logo",
|
|
"default": "One SPIE. \r\nJust ask me.",
|
|
"hotreload": true,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"NoSpeechWarning":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "What to say, when the Transcription was empty",
|
|
"default": "Ich kann dich nicht hören: Drücke und halte den Knopf vor dir, um zu sprechen!",
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"NoMicWarning":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "What to say if the microphone does not seem to work",
|
|
"default": "Das Mikrofon scheint nicht zu funktionieren - am besten suchst du dir Hilfe!",
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"InnovationDayIntro":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Intro speech for the Innovation Day mode",
|
|
"default": "Hallo und willkommen auf der One SPIE ! Ich bin ein virtueller Avatar mit dem du dich über unsere Hausmesse unterhalten kannst. By the way, you can talk in any language with me!",
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"AppTimeoutTimeS":
|
|
{
|
|
"type": "float",
|
|
"default": 20,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"SpieRecordButtonScale":
|
|
{
|
|
"type": "float",
|
|
"default": 1,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"SpieLogoScale":
|
|
{
|
|
"type": "float",
|
|
"default": 1,
|
|
"hotreload": true,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"SpieLogoOffset":
|
|
{
|
|
"type": "vector3",
|
|
"default":
|
|
{
|
|
"X": 0,
|
|
"Y": 0,
|
|
"Z": 0
|
|
},
|
|
"hotreload": true,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"MicrofonMaxInputLevel":
|
|
{
|
|
"type": "float",
|
|
"default": 0.02,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"MicrofonInputInterpSpeed":
|
|
{
|
|
"type": "float",
|
|
"default": 10,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"UseDebugDialogueWidget":
|
|
{
|
|
"type": "boolean",
|
|
"default": false,
|
|
"category": "SPIESettings"
|
|
}
|
|
},
|
|
{
|
|
"BaseProjectSettings":
|
|
{
|
|
"type": "struct",
|
|
"fields":
|
|
{
|
|
"AvatarInstance":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Name of this Instance of the avatar application. Can be used for separation of logs or future purposes"
|
|
},
|
|
"InitialMode":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Which mode to load on startup"
|
|
},
|
|
"UseLogging":
|
|
{
|
|
"type": "boolean"
|
|
},
|
|
"ButtonHintSpeech":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Should the Avatar say something when QnA Mode starts"
|
|
},
|
|
"HideUI":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Hides the UI"
|
|
},
|
|
"HideDialogueBoxAtStart":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "If activated, the DialogueBox will hide after the first Button press to initialize the conversation. Can be show again by pressing \"H\""
|
|
},
|
|
"ConstrainAspectRatio":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "If the camera should contrain to a vertical aspect ration. Can be used to enable a horizontal screen"
|
|
},
|
|
"AvatarVolume":
|
|
{
|
|
"type": "float"
|
|
},
|
|
"VFXVolume":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "Sound volume in general."
|
|
},
|
|
"LoadAnimationTestmapOnStart":
|
|
{
|
|
"type": "boolean"
|
|
},
|
|
"ConsoleCommands":
|
|
{
|
|
"type": "array",
|
|
"tooltip": "Console commands to always run",
|
|
"itemsType": "string"
|
|
},
|
|
"CurrentLocation":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Where is the user located? To help the AI with its answers."
|
|
},
|
|
"AvatarResetTimerAnimation":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "For how many seconds of the end of the reset timer length should we show the circle animation?"
|
|
},
|
|
"AvatarResetTimerLength":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How long to wait for a reset after avatar stopped talking. A value of 0 deactivates the timer"
|
|
},
|
|
"UIReactionDistancePercentages":
|
|
{
|
|
"type": "array",
|
|
"tooltip": "3 Values From near to far: distances which indicate the alpha percentage of vector from player to current camera for UI reactions like clicking on buttons",
|
|
"itemsType": "float"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"AvatarInstance": "Default Avatar",
|
|
"InitialMode": 0,
|
|
"UseLogging": true,
|
|
"ButtonHintSpeech": "Du kannst den Button drücken und halten, um mit mir zu sprechen.",
|
|
"HideUI": false,
|
|
"HideDialogueBoxAtStart": false,
|
|
"ConstrainAspectRatio": false,
|
|
"AvatarVolume": 1,
|
|
"VFXVolume": 1,
|
|
"LoadAnimationTestmapOnStart": false,
|
|
"ConsoleCommands": [],
|
|
"CurrentLocation": "",
|
|
"AvatarResetTimerAnimation": 15,
|
|
"AvatarResetTimerLength": 60,
|
|
"UIReactionDistancePercentages": [ 0.25, 0.5, 0.75 ]
|
|
},
|
|
"category": "Project Setup"
|
|
}
|
|
},
|
|
{
|
|
"AvatarCoreSettings":
|
|
{
|
|
"type": "struct",
|
|
"fields":
|
|
{
|
|
"DebugSTT":
|
|
{
|
|
"type": "enum",
|
|
"tooltip": "Deactivate or Debug STT Module",
|
|
"enum": [
|
|
"Normal",
|
|
"DebugModule",
|
|
"DebugNoModule"
|
|
],
|
|
"enumTypeName": "EAvatarCoreDebugModules"
|
|
},
|
|
"DebugAI":
|
|
{
|
|
"type": "enum",
|
|
"tooltip": "Deactivate or Debug AI Module",
|
|
"enum": [
|
|
"Normal",
|
|
"DebugModule",
|
|
"DebugNoModule"
|
|
],
|
|
"enumTypeName": "EAvatarCoreDebugModules"
|
|
},
|
|
"DebugTTS":
|
|
{
|
|
"type": "enum",
|
|
"tooltip": "Deactivate or Debug TTS Module",
|
|
"enum": [
|
|
"Normal",
|
|
"DebugModule",
|
|
"DebugNoModule"
|
|
],
|
|
"enumTypeName": "EAvatarCoreDebugModules"
|
|
},
|
|
"LookAtEnabled":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "IsLookAtEnabled"
|
|
},
|
|
"LipSyncModel":
|
|
{
|
|
"type": "enum",
|
|
"enum": [
|
|
"Original (Highest Quality)",
|
|
"Semi-Optimized (Balanced)",
|
|
"Highly Optimized (Fastest)"
|
|
],
|
|
"enumTypeName": "ERealisticMetaHumanLipSyncModelType"
|
|
},
|
|
"DebugAvatar":
|
|
{
|
|
"type": "boolean"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"DebugSTT": "Normal",
|
|
"DebugAI": "Normal",
|
|
"DebugTTS": "Normal",
|
|
"LookAtEnabled": true,
|
|
"LipSyncModel": "Original (Highest Quality)",
|
|
"DebugAvatar": false
|
|
},
|
|
"category": "Avatar Core"
|
|
}
|
|
},
|
|
{
|
|
"STTAzureSettings":
|
|
{
|
|
"type": "struct",
|
|
"tooltip": "Settigns to configure the Mircrosoft Azure STT module",
|
|
"fields":
|
|
{
|
|
"AzureAPIKey":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"AzureRegion":
|
|
{
|
|
"type": "string"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"AzureAPIKey": "0Gc0wvOF2tTCr4APvZkDaieKCGeBR9c5EbC5utXgVWZUqu4IAEf6NQ721iHNu74SwfDoYSBGJHLmCbcXVDP+F4HKnwsfHr7WYi9Gv+CjJ7/UrOygkqlrP05hbHBrJiPLWDv4Gw==",
|
|
"AzureRegion": "germanywestcentral"
|
|
},
|
|
"category": "STT"
|
|
}
|
|
},
|
|
{
|
|
"STTWhisperSettings":
|
|
{
|
|
"type": "struct",
|
|
"tooltip": "Settigns to configure the OpenAI Whisper STT module",
|
|
"fields":
|
|
{
|
|
"OpenAI_API_Key":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"WhisperURL":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"Model":
|
|
{
|
|
"type": "enum",
|
|
"enum": [
|
|
"Whisper-1",
|
|
"4o Transcribe Mini",
|
|
"4o Transcribe"
|
|
],
|
|
"enumTypeName": "EOpenAITranscriptionModel"
|
|
},
|
|
"MinDuration":
|
|
{
|
|
"type": "float"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"OpenAI_API_Key": "UjzfgavJ45lCu+oB2vVAsKNbPT+k3XCv7t69Og6j0LmwxhD3OK5WDBxUvgKnuDrz3xuNHg==",
|
|
"WhisperURL": "api.openai.com/v1/audio/transcriptions",
|
|
"Model": "4o Transcribe",
|
|
"MinDuration": 0.75
|
|
},
|
|
"category": "STT"
|
|
}
|
|
},
|
|
{
|
|
"STTParakeetSettings":
|
|
{
|
|
"type": "struct",
|
|
"tooltip": "Settigns to configure the nVidia parakeet STT module",
|
|
"fields":
|
|
{
|
|
"PythonPath":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"PretrainedModel":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"Host":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"Port":
|
|
{
|
|
"type": "integer"
|
|
},
|
|
"KeepAliveRule":
|
|
{
|
|
"type": "enum",
|
|
"enum": [
|
|
"Only keep Module open in Editor Mode",
|
|
"Never keep Module alive.",
|
|
"Keep Module alive in Editor and Shipping mode."
|
|
],
|
|
"enumTypeName": "EKeepAliveRule"
|
|
},
|
|
"Device":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"UpdateIntervalSec":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How often (seconds) the Python server produces intermediate transcription updates"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"PythonPath": "python",
|
|
"PretrainedModel": "nvidia/parakeet-tdt-0.6b-v3",
|
|
"Host": "127.0.0.1",
|
|
"Port": 40200,
|
|
"KeepAliveRule": "Only keep Module open in Editor Mode",
|
|
"Device": "cuda:0",
|
|
"UpdateIntervalSec": 0.5
|
|
},
|
|
"category": "STT"
|
|
}
|
|
},
|
|
{
|
|
"STTSettings":
|
|
{
|
|
"type": "struct",
|
|
"fields":
|
|
{
|
|
"STTTranscriptionType":
|
|
{
|
|
"type": "enum",
|
|
"tooltip": "Which Service to use, can be overridden by �I settings to stream directly to OpenAI",
|
|
"enum": [
|
|
"Mircosoft Azure Congnitive Speech Services",
|
|
"OpenAI Transcription",
|
|
"nvidia NeMo Parakeet (local transcription)"
|
|
],
|
|
"enumTypeName": "ESTTTranscriptionType"
|
|
},
|
|
"bUsePTT":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Use the push to talk button or allow freespeech"
|
|
},
|
|
"bCanInterrupt":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Can the suer interrupt the avatar"
|
|
},
|
|
"bSTTDebugAudioSave":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Save the final recording from microphone to Saved/STT folder"
|
|
},
|
|
"WebRTCSettings":
|
|
{
|
|
"type": "struct",
|
|
"tooltip": "Settings of the WebRTC Channel.",
|
|
"fields":
|
|
{
|
|
"pipeline_multi_channel_capture":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables capturing audio with more than one input channel.\nWhen false, WebRTC downmixes input to mono before processing."
|
|
},
|
|
"pipeline_multi_channel_render":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables rendering audio with multiple output channels.\nWhen false, WebRTC outputs mono audio."
|
|
},
|
|
"pipeline_maximum_internal_processing_rate":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Upper limit for WebRTC�s internal audio processing sample rate in Hz.\nWebRTC will resample input audio to this rate for all DSP stages.\nCommon values are 16000, 32000, or 48000."
|
|
},
|
|
"echo_canceller":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables WebRTC Acoustic Echo Cancellation (AEC).\nRemoves far end audio that is played through speakers from the microphone signal."
|
|
},
|
|
"pre_amplifier":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables a fixed pre amplification stage before other processing.\nThis boosts quiet microphone signals but can amplify noise if misused."
|
|
},
|
|
"high_pass_filter":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables a high pass filter that removes low frequency noise.\nUseful for eliminating rumble or microphone DC offset."
|
|
},
|
|
"noise_suppression":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables WebRTC noise suppression.\nReduces stationary background noise like fans or ambient hum."
|
|
},
|
|
"noise_suppression_level":
|
|
{
|
|
"type": "integer"
|
|
},
|
|
"transient_suppression":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables transient noise suppression.\nTargets short non speech sounds like keyboard clicks or taps."
|
|
},
|
|
"gain_controller1":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables legacy automatic gain control (AGC1).\nAdjusts microphone gain slowly over time.\nUsually disabled in favor of gain_controller2."
|
|
},
|
|
"gain_controller2":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables modern automatic gain control (AGC2).\nProvides faster and more stable loudness normalization."
|
|
},
|
|
"voice_detection":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables voice activity detection.\nWebRTC estimates whether the current audio frame contains speech.\nOnly available when WEBRTC_5414 or newer is used which it is by default in 5.6."
|
|
},
|
|
"residual_echo_detector":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables residual echo detection.\nDetects remaining echo after echo cancellation and can influence other DSP stages.\nOnly available when WEBRTC_5414 or newer is used which it is by default in 5.6."
|
|
},
|
|
"level_estimation":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Enables internal audio level estimation.\nWebRTC computes signal loudness metrics for diagnostics or downstream logic.\nOnly available when WEBRTC_5414 or newer is used which it is by default in 5.6."
|
|
}
|
|
}
|
|
},
|
|
"VADSettings":
|
|
{
|
|
"type": "struct",
|
|
"tooltip": "Settings of the VAD module that determine if user is currently speaking.",
|
|
"fields":
|
|
{
|
|
"VAD_Mode":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "How aggressive is the VAD Module (quality, normal, aggressive, very aggresive)"
|
|
},
|
|
"VAD_SpeechWhileBlocked":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How long to record to consider it, user wants to interrupt avatar"
|
|
},
|
|
"VAD_MinSpeechTime":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How long to speak until it is considert speaking?"
|
|
},
|
|
"VAD_MinSpeechAmplitude":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Much background noise? Try to raise the limit of -dB to even consider speech?"
|
|
}
|
|
}
|
|
},
|
|
"SpeexDSPSettings":
|
|
{
|
|
"type": "struct",
|
|
"tooltip": "Settings of the SpeexDSP Module",
|
|
"fields":
|
|
{
|
|
"StateNames":
|
|
{
|
|
"type": "array",
|
|
"itemsType": "enum",
|
|
"itemsEnum": [
|
|
"preprocessor denoiser state",
|
|
"preprocessor Automatic Gain Control state",
|
|
"preprocessor Voice Activity Detection state",
|
|
"preprocessor dereverb state",
|
|
"preprocessor dereverb level",
|
|
"preprocessor dereverb decay",
|
|
"probability required for the VAD to go from silence to voice",
|
|
"probability required for the VAD to stay in the voice state (integer percent)",
|
|
"maximum attenuation of the noise in dB (negative number)",
|
|
"attenuation of the residual echo in dB (negative number)",
|
|
"maximum attenuation of the residual echo in dB when near end is active (negative number)",
|
|
"the corresponding echo canceller state so that residual echo suppression can be performed (NULL for no residual echo suppression)",
|
|
"maximal gain increase in dB/second (int32)",
|
|
"maximal gain decrease in dB/second (int32)",
|
|
"maximal gain in dB (int32)",
|
|
"preprocessor Automatic Gain Control level (int32)"
|
|
],
|
|
"itemsEnumTypeName": "ESpeexDSPState"
|
|
},
|
|
"SettingEntries":
|
|
{
|
|
"type": "array",
|
|
"itemsType": "struct",
|
|
"itemsFields":
|
|
{
|
|
"TypeName":
|
|
{
|
|
"type": "enum",
|
|
"enum": [
|
|
"preprocessor denoiser state",
|
|
"preprocessor Automatic Gain Control state",
|
|
"preprocessor Voice Activity Detection state",
|
|
"preprocessor dereverb state",
|
|
"preprocessor dereverb level",
|
|
"preprocessor dereverb decay",
|
|
"probability required for the VAD to go from silence to voice",
|
|
"probability required for the VAD to stay in the voice state (integer percent)",
|
|
"maximum attenuation of the noise in dB (negative number)",
|
|
"attenuation of the residual echo in dB (negative number)",
|
|
"maximum attenuation of the residual echo in dB when near end is active (negative number)",
|
|
"the corresponding echo canceller state so that residual echo suppression can be performed (NULL for no residual echo suppression)",
|
|
"maximal gain increase in dB/second (int32)",
|
|
"maximal gain decrease in dB/second (int32)",
|
|
"maximal gain in dB (int32)",
|
|
"preprocessor Automatic Gain Control level (int32)"
|
|
],
|
|
"enumTypeName": "ESpeexDSPState"
|
|
},
|
|
"Value":
|
|
{
|
|
"type": "integer"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"STTLanguages":
|
|
{
|
|
"type": "array",
|
|
"tooltip": "All languages the STT module should understand simultaneously.",
|
|
"itemsType": "enum",
|
|
"itemsEnum": [
|
|
"Unset",
|
|
"English",
|
|
"French",
|
|
"German",
|
|
"Spanish",
|
|
"Portuguese",
|
|
"Chinese",
|
|
"Japanese",
|
|
"Hindi",
|
|
"Italian",
|
|
"Korean",
|
|
"Dutch",
|
|
"Polish",
|
|
"Russian",
|
|
"Swedish",
|
|
"Turkish",
|
|
"Filipino",
|
|
"Bulgarian",
|
|
"Romanian",
|
|
"Arabic",
|
|
"Czech",
|
|
"Greek",
|
|
"Finnish",
|
|
"Croatian",
|
|
"Malay",
|
|
"Slovak",
|
|
"Danish",
|
|
"Tamil",
|
|
"Ukrainian",
|
|
"Hungarian",
|
|
"Norwegian",
|
|
"Vietnamese",
|
|
"Bengali",
|
|
"Thai",
|
|
"Hebrew",
|
|
"Georgian",
|
|
"Indonesian",
|
|
"Telugu",
|
|
"Gujarati",
|
|
"Kannada",
|
|
"Malayalam",
|
|
"Marathi",
|
|
"Punjabi"
|
|
],
|
|
"itemsEnumTypeName": "ELanguage"
|
|
},
|
|
"STTReplacements":
|
|
{
|
|
"type": "array",
|
|
"tooltip": "Transcriptions to always change to another word.",
|
|
"itemsType": "struct",
|
|
"itemsFields":
|
|
{
|
|
"TranscribedWords":
|
|
{
|
|
"type": "array",
|
|
"itemsType": "string"
|
|
},
|
|
"ReplacementWord":
|
|
{
|
|
"type": "string"
|
|
}
|
|
}
|
|
},
|
|
"STTSpecialWords":
|
|
{
|
|
"type": "array",
|
|
"tooltip": "Special words that the transcription service needs to know (e.g. b.ReX or Bruce-B).",
|
|
"itemsType": "string"
|
|
},
|
|
"PTTPostRollTime":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How long to keep transcribing when user released button. Users like to finish early ;)"
|
|
},
|
|
"FreespeechPostRollTime":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How long to keep transcribing when user stopped talking.)"
|
|
},
|
|
"MaxTalkingTime":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How long can the user press the button?)"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"STTTranscriptionType": "Mircosoft Azure Congnitive Speech Services",
|
|
"bUsePTT": true,
|
|
"bCanInterrupt": true,
|
|
"bSTTDebugAudioSave": false,
|
|
"WebRTCSettings":
|
|
{
|
|
"pipeline_multi_channel_capture": false,
|
|
"pipeline_multi_channel_render": false,
|
|
"pipeline_maximum_internal_processing_rate": 48000,
|
|
"echo_canceller": true,
|
|
"pre_amplifier": false,
|
|
"high_pass_filter": true,
|
|
"noise_suppression": true,
|
|
"noise_suppression_level": 3,
|
|
"transient_suppression": true,
|
|
"gain_controller1": false,
|
|
"gain_controller2": false,
|
|
"voice_detection": false,
|
|
"residual_echo_detector": true,
|
|
"level_estimation": false
|
|
},
|
|
"VADSettings":
|
|
{
|
|
"VAD_Mode": 2,
|
|
"VAD_SpeechWhileBlocked": 0.5,
|
|
"VAD_MinSpeechTime": 0.25,
|
|
"VAD_MinSpeechAmplitude": -55
|
|
},
|
|
"SpeexDSPSettings":
|
|
{
|
|
"StateNames": [],
|
|
"SettingEntries": [
|
|
{
|
|
"TypeName": "preprocessor Automatic Gain Control state",
|
|
"Value": 0
|
|
},
|
|
{
|
|
"TypeName": "preprocessor dereverb state",
|
|
"Value": 10
|
|
},
|
|
{
|
|
"TypeName": "maximum attenuation of the noise in dB (negative number)",
|
|
"Value": -10
|
|
},
|
|
{
|
|
"TypeName": "attenuation of the residual echo in dB (negative number)",
|
|
"Value": -40
|
|
},
|
|
{
|
|
"TypeName": "maximum attenuation of the residual echo in dB when near end is active (negative number)",
|
|
"Value": -15
|
|
}
|
|
]
|
|
},
|
|
"STTLanguages": [
|
|
"German",
|
|
"English"
|
|
],
|
|
"STTReplacements": [
|
|
{
|
|
"TranscribedWords": [
|
|
"spie",
|
|
"schpie",
|
|
"spieh",
|
|
"spiel"
|
|
],
|
|
"ReplacementWord": "SPIE"
|
|
}
|
|
],
|
|
"STTSpecialWords": [
|
|
"SPIE",
|
|
"Innovation Day",
|
|
"Keynote",
|
|
"Clarissa Hack",
|
|
"Jérôme Morisson"
|
|
],
|
|
"PTTPostRollTime": 0.40000000596046448,
|
|
"FreespeechPostRollTime": 0.5,
|
|
"MaxTalkingTime": 120
|
|
},
|
|
"category": "STT"
|
|
}
|
|
},
|
|
{
|
|
"AzureOpenAI_API":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Encrypted Azure OpenAI API Key",
|
|
"default": "UjzfgavJ45lCu+oB2vVAsKNbPT+k3XCv7t69Og6j0LmwxhD3OK5WDBxUvgKnuDrz3xuNHg==",
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"DebugOpenAI_API":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "OpenAI API key for translation testing",
|
|
"default": "UWOrSTUJn8TJJmJWPR10q2NTUJgX4JGvzMzr5/j9tjwcx02VEWRMuW0plF8vulntD2ipUwjmodUX0lD7XM5rG6Po7Ayam/fqLG5QyEaofbQN2j6zyZx/0recl7nFTTBH18La5YQDZm+BERpxMvdE5wmx50Vcl1z5PgeF1L1IvjAgh9nNRpjAaDzTAD9iHhVmtfxjCEHvcl9L8xc5ip6eKzWMXHlTv76DASeiwcBp1BFVTXPz",
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"OpenAISettings":
|
|
{
|
|
"type": "struct",
|
|
"fields":
|
|
{
|
|
"BaseAISettings":
|
|
{
|
|
"type": "struct",
|
|
"tooltip": "Base URL - Change this to the correct Azure API URL",
|
|
"fields":
|
|
{
|
|
"APIKey":
|
|
{
|
|
"type": "string"
|
|
},
|
|
"ModelID":
|
|
{
|
|
"type": "string"
|
|
}
|
|
}
|
|
},
|
|
"BaseURL":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Base URL - Change this to the correct Azure API URL"
|
|
},
|
|
"IsAzureOpenAI":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Do we use the Microsoft Azure OpenAI API"
|
|
},
|
|
"Voice":
|
|
{
|
|
"type": "enum",
|
|
"tooltip": "OpenAI RealtimeAPI Voice",
|
|
"enum": [
|
|
"UNDEFINED",
|
|
"alloy",
|
|
"ash",
|
|
"ballad",
|
|
"cedar",
|
|
"coral",
|
|
"echo",
|
|
"marin",
|
|
"sage",
|
|
"shimmer",
|
|
"verse"
|
|
],
|
|
"enumTypeName": "EOpenAIRealtimeVoice"
|
|
},
|
|
"InputAudioStreaming":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Shall we forward all audio chunks directly to OpenAI - Does not work well, if we do not forward silence as well"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"BaseAISettings":
|
|
{
|
|
"APIKey": "UjzfgavJ45lCu+oB2vVAsKNbPT+k3XCv7t69Og6j0LmwxhD3OK5WDBxUvgKnuDrz3xuNHg==",
|
|
"ModelID": "gpt-realtime"
|
|
},
|
|
"BaseURL": "aiwa-ai-new.openai.azure.com/openai",
|
|
"IsAzureOpenAI": true,
|
|
"Voice": "UNDEFINED",
|
|
"InputAudioStreaming": false
|
|
},
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"GlobalAISettings":
|
|
{
|
|
"type": "struct",
|
|
"fields":
|
|
{
|
|
"bUseMCPServer":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Boot up a FastMCP Server on Startup (On close keep open in Editor Mode, kill in shipping)"
|
|
},
|
|
"AIModelAudioOutput":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "Does the AI model generate Audio Chunks that can be forwarded to the TTS Manager?"
|
|
},
|
|
"MaxTokens":
|
|
{
|
|
"type": "integer"
|
|
},
|
|
"Temperature":
|
|
{
|
|
"type": "float"
|
|
},
|
|
"RequestTimeout":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Does the AI model generate Audio Chunks that can be forwarded to the TTS Manager?"
|
|
},
|
|
"DelayAnswerSeconds":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "How many seconds before the delayedanswer event is triggered"
|
|
},
|
|
"MaxMessages":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "How many messages to keep in context (-1 for infinite)"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"bUseMCPServer": false,
|
|
"AIModelAudioOutput": true,
|
|
"MaxTokens": 1500,
|
|
"Temperature": 0.80000001192092896,
|
|
"RequestTimeout": 10,
|
|
"DelayAnswerSeconds": 1.5,
|
|
"MaxMessages": -1
|
|
},
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"AzureAISearch_API":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Azure AI Search API Key",
|
|
"default": "9ksww+/kHZPYMfUFTWSi1QgaDjXaB7vxdGKPgmfGhPEx321nfpdbkUnThXT4BWrkKMAR0MF2zJT17fJSday8yBiftv0=",
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"AzureAISearch_URL":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Azure AIWA URL",
|
|
"default": "aiwa-ai-search.search.windows.net",
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"AzureAISearch_Index":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "Azure AI Search Indexname",
|
|
"default": "innovationday",
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"DatabaseDescription":
|
|
{
|
|
"type": "string",
|
|
"tooltip": "For all questions related to SPIE, the Innovation Day, projects, technologies, or company initiatives, retrieve information from the internal database. Use the database as the primary source of information.",
|
|
"default": "For all questions related to SPIE, the Innovation Day, keynotes, projects, technologies, or company initiatives, retrieve information from the internal database. Use the database as the primary source of information.",
|
|
"category": "AI"
|
|
}
|
|
},
|
|
{
|
|
"GlobalTTSSettings":
|
|
{
|
|
"type": "struct",
|
|
"fields":
|
|
{
|
|
"UseCacheSystem":
|
|
{
|
|
"type": "boolean",
|
|
"tooltip": "If true, the audio will be cached"
|
|
},
|
|
"WordReplacements":
|
|
{
|
|
"type": "array",
|
|
"tooltip": "A map of words to replace in the text to fix pronauncation use separator \"|\". Left of it = word to preplace; right of it word to replace with.",
|
|
"itemsType": "string"
|
|
},
|
|
"AudioNumChannels":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Number of Audio Channels - Only tested with 1 but who needs more anyway?"
|
|
},
|
|
"AudioSampleRate":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Samplerate of the audio to be produced"
|
|
},
|
|
"ResampleToSampleRate":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Resample the produced audio ot this samplerate if neccessary (-1 to deactivate)"
|
|
},
|
|
"ChunkLength":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "Chunk length in seconds"
|
|
},
|
|
"StreamAmplitudeMultiplier":
|
|
{
|
|
"type": "float",
|
|
"tooltip": "Multiplier to make stream audio less loud"
|
|
},
|
|
"MaxConcurrentGenerations":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Max concurrent generations possible"
|
|
},
|
|
"bCommaSplitRule":
|
|
{
|
|
"type": "enum",
|
|
"tooltip": "How to split the text (if input text streaming is not activated by the audio generation mode)",
|
|
"enum": [
|
|
"Do not split sentences on comma",
|
|
"Split on comma on first occurence",
|
|
"Split on every comma.",
|
|
"Split on every comma, but add fill words."
|
|
],
|
|
"enumTypeName": "ECommaSplitRule"
|
|
},
|
|
"MaxCharacterForSplit":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Min amount to text for splitting rule to be applied"
|
|
},
|
|
"MaxCharacterForGeneration":
|
|
{
|
|
"type": "integer",
|
|
"tooltip": "Max allowed characters for text generation"
|
|
},
|
|
"Language":
|
|
{
|
|
"type": "enum",
|
|
"enum": [
|
|
"Unset",
|
|
"English",
|
|
"French",
|
|
"German",
|
|
"Spanish",
|
|
"Portuguese",
|
|
"Chinese",
|
|
"Japanese",
|
|
"Hindi",
|
|
"Italian",
|
|
"Korean",
|
|
"Dutch",
|
|
"Polish",
|
|
"Russian",
|
|
"Swedish",
|
|
"Turkish",
|
|
"Filipino",
|
|
"Bulgarian",
|
|
"Romanian",
|
|
"Arabic",
|
|
"Czech",
|
|
"Greek",
|
|
"Finnish",
|
|
"Croatian",
|
|
"Malay",
|
|
"Slovak",
|
|
"Danish",
|
|
"Tamil",
|
|
"Ukrainian",
|
|
"Hungarian",
|
|
"Norwegian",
|
|
"Vietnamese",
|
|
"Bengali",
|
|
"Thai",
|
|
"Hebrew",
|
|
"Georgian",
|
|
"Indonesian",
|
|
"Telugu",
|
|
"Gujarati",
|
|
"Kannada",
|
|
"Malayalam",
|
|
"Marathi",
|
|
"Punjabi"
|
|
],
|
|
"enumTypeName": "ELanguage"
|
|
}
|
|
},
|
|
"default":
|
|
{
|
|
"UseCacheSystem": true,
|
|
"WordReplacements": [],
|
|
"AudioNumChannels": 1,
|
|
"AudioSampleRate": 22050,
|
|
"ResampleToSampleRate": -1,
|
|
"ChunkLength": 0.0099999997764825821,
|
|
"StreamAmplitudeMultiplier": 0.25,
|
|
"MaxConcurrentGenerations": 3,
|
|
"bCommaSplitRule": "Split on comma on first occurence",
|
|
"MaxCharacterForSplit": 20,
|
|
"MaxCharacterForGeneration": 0,
|
|
"Language": "German"
|
|
},
|
|
"category": "TTS"
|
|
}
|
|
}
|
|
]
|
|
}
|