使用index-tts模型文本转语音

# https://github.com/IAMJOYBO/index-tts

git clone --depth=1 https://github.com/index-tts/index-tts.git

# 进入容器
docker run --entrypoint=bash -it --rm \
  -v /home/jcleng/work/mywork/index-tts:/app/index-tts \
  -e HF_ENDPOINT="https://hf-mirror.com" \
  --name=indexttsbash registry.cn-hangzhou.aliyuncs.com/joybo/index-tts:latest

# 下载模型
huggingface-cli download IndexTeam/Index-TTS --local-dir ./checkpoints

# 运行容器,需要host,把显卡都映射进去(不支持intel回归到cpu)
docker run -itd --network=host \
  -v /home/jcleng/work/mywork/index-tts:/app/index-tts \
  -e HF_ENDPOINT="https://hf-mirror.com" \
  --device=/dev/dri \
  --name=indextts registry.cn-hangzhou.aliyuncs.com/joybo/index-tts:latest

# 访问web
http://127.0.0.1:7860/

# checkpoints 目录结构,bpe.model需要复制到子目录
└❯ tree -L 2 .
.
├── bigvgan_discriminator.pth
├── bigvgan_generator.pth
├── bpe.model
├── checkpoints
│   └── bpe.model
├── config.yaml
├── dvae.pth
├── gpt.pth
├── README.md
└── unigram_12000.vocab

2 directories, 9 files
  • 使用fish-speech

# 已经带有模型了
docker run -itd --network=host \
  --name=fishspeech \
  swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/fishaudio/fish-speech:v1.5.0

# 访问web
http://127.0.0.1:7860/