-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathelevenlabs.go
107 lines (94 loc) · 2.79 KB
/
elevenlabs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package elevenlabs
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
var customTransforms = map[string]string{
"C#": "C sharp",
"areEqual": "are equal",
".NET": "dot net",
"C++": "C plus plus",
"_": " underscore ",
".sh": "dot S H",
".js": "dot J S",
".ts": "dot T S",
".css": "dot C S S",
".html": "dot H T M L",
".json": "dot J S O N",
".yaml": "dot yaml",
".yml": "dot yaml",
".xml": "dot X M L",
".md": "dot M D",
".txt": "dot T X T",
".log": "dot L O G",
".csv": "dot C S V",
".go": "dot go",
}
// applyCustomTransforms applies all custom text transformations.
func applyCustomTransforms(text string) string {
for key, value := range customTransforms {
if strings.Contains(text, key) {
text = strings.ReplaceAll(text, key, value)
}
}
return text
}
// getAudioArrayBufferElevenLabs sends a POST request to ElevenLabs’ TTS API
// and returns the audio data as a byte slice.
func GetAudioArrayBufferElevenLabs(textToSpeak, ttsApiKey, ttsVoiceId string) ([]byte, error) {
// Apply any custom transforms
textToSpeak = applyCustomTransforms(textToSpeak)
// Prepare the request payload.
// It must match the API’s expected JSON structure.
payload := struct {
Text string `json:"text"`
ModelID string `json:"model_id"`
VoiceSettings struct {
Stability float64 `json:"stability"`
SimilarityBoost float64 `json:"similarity_boost"`
} `json:"voice_settings"`
}{
Text: textToSpeak,
ModelID: "eleven_turbo_v2",
}
payload.VoiceSettings.Stability = 0.5
payload.VoiceSettings.SimilarityBoost = 0.95
reqBody, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("error marshalling payload: %w", err)
}
// If no voice id is provided, use the default.
if ttsVoiceId == "" {
ttsVoiceId = "iP95p4xoKVk53GoZ742B"
}
// Build the ElevenLabs API URL.
url := fmt.Sprintf("https://api.elevenlabs.io/v1/text-to-speech/%s", ttsVoiceId)
req, err := http.NewRequest("POST", url, bytes.NewBuffer(reqBody))
if err != nil {
return nil, fmt.Errorf("error creating request: %w", err)
}
// Set headers.
req.Header.Set("Content-Type", "application/json")
req.Header.Set("xi-api-key", ttsApiKey)
req.Header.Set("Accept", "audio/mpeg")
client := &http.Client{Timeout: 30 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("error making POST request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("HTTP error! Status: %d, Body: %s", resp.StatusCode, string(bodyBytes))
}
audioData, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading response body: %w", err)
}
return audioData, nil
}