Genie/Tutorial/English/API Server Tutorial.py at master · ljgit428/Genie · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
Start the API Server using genie.start_server(host=SERVER_HOST, port=SERVER_PORT, workers=1).

Quick Reference for Genie TTS Server API

1. Load Character Model
Endpoint: POST /load_character
Function: Load a character model into the server.
Request Parameters (JSON):
    - character_name (string): Unique name of the character.
    - onnx_model_dir (string): Path to the model folder on the server.

2. Set Reference Audio
Endpoint: POST /set_reference_audio
Function: Set the audio required for voice cloning for the loaded character.
Request Parameters (JSON):
    - character_name (string): Name of the character to set.
    - audio_path (string): Path to the reference audio file on the server.
    - audio_text (string): Text corresponding to the reference audio.

3. Text-to-Speech (TTS)
Endpoint: POST /tts
Function: Generate speech and return it as an audio/wav stream.
Request Parameters (JSON):
    - character_name (string): Character name to use.
    - text (string): Text to convert to speech.
    - split_sentence (boolean, optional): Whether to auto-split sentences, default is false.
    - save_path (string, optional): Full path to save audio on the server.

4. Unload Character Model
Endpoint: POST /unload_character
Function: Remove a character from server memory to free resources.
Request Parameters (JSON):
    - character_name (string): Character name to unload.

5. Stop All TTS Tasks
Endpoint: POST /stop
Function: Immediately stop all ongoing speech synthesis tasks.
Request Parameters: None.

6. Clear Reference Audio Cache
Endpoint: POST /clear_reference_audio_cache
Function: Clear the loaded reference audio cache on the server.
Request Parameters: None.
"""

import os

# (Optional) We recommend manually specifying the Hubert path for Genie.
# Download from Huggingface: https://huggingface.co/High-Logic/Genie
# Note: If this line is not set, Genie will automatically download the model from Huggingface.
os.environ['HUBERT_MODEL_PATH'] = r"C:\path\to\chinese-hubert-base.onnx"

# (Optional) We recommend manually specifying the dictionary path for pyopenjtalk.
# Download from Huggingface: https://huggingface.co/High-Logic/Genie
# Note: If this line is not set, pyopenjtalk will automatically download the dictionary.
os.environ['OPEN_JTALK_DICT_DIR'] = r"C:\path\to\open_jtalk_dic_utf_8-1.11"

import time
import requests
import pyaudio
import multiprocessing

import genie_tts as genie

# --- Configuration ---
# Server address
SERVER_HOST = "127.0.0.1"
SERVER_PORT = 8000
BASE_URL = f"http://{SERVER_HOST}:{SERVER_PORT}"

BYTES_PER_SAMPLE = 2
CHANNELS = 1
SAMPLE_RATE = 32000


def run_server():
    genie.start_server(host=SERVER_HOST, port=SERVER_PORT, workers=1)


def main_client():
    # 1. Load Character
    print("\n[Client] Step 1: Sending load character request...")
    load_payload = {
        "character_name": "<CHARACTER_NAME>",  # Replace with your character name
        "onnx_model_dir": r"<PATH_TO_CHARACTER_ONNX_MODEL_DIR>"  # Replace with the folder containing the ONNX model
    }
    try:
        response = requests.post(f"{BASE_URL}/load_character", json=load_payload)
        response.raise_for_status()
        print(f"[Client] Character loaded successfully: {response.json()['message']}")
    except requests.exceptions.RequestException as e:
        print(f"[Client] Failed to load character: {e}")
        return

    # 2. Set Reference Audio
    print("\n[Client] Step 2: Sending set reference audio request...")
    ref_audio_payload = {
        "character_name": "<CHARACTER_NAME>",  # Use the same character name as above
        "audio_path": r"<PATH_TO_REFERENCE_AUDIO>",  # Replace with path to your reference audio file
        "audio_text": "<REFERENCE_AUDIO_TEXT>"  # Replace with the text corresponding to the reference audio
    }
    try:
        response = requests.post(f"{BASE_URL}/set_reference_audio", json=ref_audio_payload)
        response.raise_for_status()
        print(f"[Client] Reference audio set successfully: {response.json()['message']}")
    except requests.exceptions.RequestException as e:
        print(f"[Client] Failed to set reference audio: {e}")
        return

    # 3. Request TTS and play audio stream
    print("\n[Client] Step 3: Requesting TTS and preparing audio stream...")
    tts_payload = {
        "character_name": "<CHARACTER_NAME>",  # Use the same character name
        "text": "<TEXT_TO_SYNTHESIZE>",  # Replace with the text you want to synthesize
        "split_sentence": True
    }

    p = pyaudio.PyAudio()
    stream = None

    try:
        with requests.post(f"{BASE_URL}/tts", json=tts_payload, stream=True) as response:
            response.raise_for_status()
            print("[Client] Connected to audio stream, starting playback...")

            # Iterate over received audio chunks
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    if stream is None:
                        stream = p.open(format=p.get_format_from_width(BYTES_PER_SAMPLE),
                                        channels=CHANNELS,
                                        rate=SAMPLE_RATE,
                                        output=True)
                    stream.write(chunk)

            print("[Client] Audio stream finished.")

    except requests.exceptions.RequestException as e:
        print(f"[Client] TTS request failed: {e}")
    except Exception as e:
        print(f"[Client] Error during playback: {e}")
    finally:
        if stream:
            stream.stop_stream()
            stream.close()
        p.terminate()


if __name__ == "__main__":
    # Create and start server process
    server_process = multiprocessing.Process(target=run_server)
    server_process.start()

    # Give the server some time to start
    time.sleep(3)

    # Run client logic
    try:
        main_client()
    finally:
        print("\n[Main] Test completed, shutting down server...")
        server_process.terminate()
        server_process.join()
        print("[Main] Server closed.")