Spaces:
Running
Running
Fedir Zadniprovskyi
commited on
Commit
•
8fc9285
1
Parent(s):
4005fd1
feat: reorganize docker files
Browse files- .github/workflows/docker-build-and-push.yaml +7 -4
- Dockerfile.cuda → Dockerfile +17 -11
- Dockerfile.cpu +0 -22
- README.md +17 -9
- Taskfile.yaml +0 -12
- compose.cpu.yaml +17 -0
- compose.cuda-cdi.yaml +24 -0
- compose.cuda.yaml +22 -0
- observability-compose.yaml → compose.observability.yaml +0 -0
- compose.yaml +3 -37
.github/workflows/docker-build-and-push.yaml
CHANGED
@@ -13,11 +13,12 @@ jobs:
|
|
13 |
runs-on: ubuntu-latest
|
14 |
strategy:
|
15 |
matrix:
|
16 |
-
|
|
|
17 |
include:
|
18 |
-
-
|
19 |
tag-suffix: -cuda
|
20 |
-
-
|
21 |
tag-suffix: -cpu
|
22 |
steps:
|
23 |
- uses: actions/checkout@v4
|
@@ -45,7 +46,9 @@ jobs:
|
|
45 |
uses: docker/build-push-action@v6
|
46 |
with:
|
47 |
context: .
|
48 |
-
file:
|
|
|
|
|
49 |
push: true
|
50 |
platforms: linux/amd64,linux/arm64
|
51 |
tags: ${{ steps.meta.outputs.tags }}
|
|
|
13 |
runs-on: ubuntu-latest
|
14 |
strategy:
|
15 |
matrix:
|
16 |
+
# https://hub.docker.com/r/nvidia/cuda/tags
|
17 |
+
base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04']
|
18 |
include:
|
19 |
+
- base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
|
20 |
tag-suffix: -cuda
|
21 |
+
- base-image: ubuntu:24.04
|
22 |
tag-suffix: -cpu
|
23 |
steps:
|
24 |
- uses: actions/checkout@v4
|
|
|
46 |
uses: docker/build-push-action@v6
|
47 |
with:
|
48 |
context: .
|
49 |
+
file: Dockerfile
|
50 |
+
build-args: |
|
51 |
+
BASE_IMAGE=${{ matrix.base-image }}
|
52 |
push: true
|
53 |
platforms: linux/amd64,linux/arm64
|
54 |
tags: ${{ steps.meta.outputs.tags }}
|
Dockerfile.cuda → Dockerfile
RENAMED
@@ -1,22 +1,28 @@
|
|
1 |
-
|
|
|
2 |
LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
|
3 |
# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
|
4 |
# hadolint ignore=DL3008
|
5 |
RUN apt-get update && \
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
11 |
# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
|
|
|
12 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
COPY ./src ./pyproject.toml ./uv.lock ./
|
17 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
18 |
-
|
19 |
ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
|
20 |
ENV UVICORN_HOST=0.0.0.0
|
21 |
ENV UVICORN_PORT=8000
|
|
|
22 |
CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
|
|
|
1 |
+
ARG BASE_IMAGE=nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
|
2 |
+
FROM ${BASE_IMAGE}
|
3 |
LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
|
4 |
# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
|
5 |
# hadolint ignore=DL3008
|
6 |
RUN apt-get update && \
|
7 |
+
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
|
8 |
+
apt-get clean && \
|
9 |
+
rm -rf /var/lib/apt/lists/*
|
10 |
+
USER ubuntu
|
11 |
+
ENV HOME=/home/ubuntu \
|
12 |
+
PATH=/home/ubuntu/.local/bin:$PATH
|
13 |
+
WORKDIR $HOME/faster-whisper-server
|
14 |
+
COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
|
15 |
# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
|
16 |
+
# https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode
|
17 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
18 |
+
--mount=type=bind,source=uv.lock,target=uv.lock \
|
19 |
+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
20 |
+
uv sync --frozen --compile-bytecode --no-install-project
|
21 |
+
COPY --chown=ubuntu ./src ./pyproject.toml ./uv.lock ./
|
22 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
23 |
+
uv sync --frozen --compile-bytecode --extra ui --extra opentelemetry
|
24 |
ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
|
25 |
ENV UVICORN_HOST=0.0.0.0
|
26 |
ENV UVICORN_PORT=8000
|
27 |
+
EXPOSE 8000
|
28 |
CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
|
Dockerfile.cpu
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
FROM ubuntu:24.04
|
2 |
-
LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
|
3 |
-
# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
|
4 |
-
# hadolint ignore=DL3008
|
5 |
-
RUN apt-get update && \
|
6 |
-
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
|
7 |
-
apt-get clean && \
|
8 |
-
rm -rf /var/lib/apt/lists/*
|
9 |
-
COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
|
10 |
-
WORKDIR /root/faster-whisper-server
|
11 |
-
# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
|
12 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
13 |
-
--mount=type=bind,source=uv.lock,target=uv.lock \
|
14 |
-
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
15 |
-
uv sync --frozen --no-install-project
|
16 |
-
COPY ./src ./pyproject.toml ./uv.lock ./
|
17 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
18 |
-
uv sync --frozen --extra ui
|
19 |
-
ENV WHISPER__MODEL=Systran/faster-whisper-small
|
20 |
-
ENV UVICORN_HOST=0.0.0.0
|
21 |
-
ENV UVICORN_PORT=8000
|
22 |
-
CMD ["uv", "run", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -25,18 +25,26 @@ See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio)
|
|
25 |
|
26 |
![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
|
27 |
|
28 |
-
Using Docker
|
|
|
|
|
29 |
```bash
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
33 |
```
|
34 |
-
|
|
|
35 |
```bash
|
36 |
-
|
37 |
-
docker
|
38 |
-
#
|
39 |
-
docker
|
40 |
```
|
41 |
|
42 |
Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
|
|
|
25 |
|
26 |
![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
|
27 |
|
28 |
+
Using Docker Compose (Recommended)
|
29 |
+
NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
|
30 |
+
|
31 |
```bash
|
32 |
+
curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
|
33 |
+
|
34 |
+
# for GPU support
|
35 |
+
curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
|
36 |
+
docker compose --file compose.cuda.yaml up --detach
|
37 |
+
# for CPU only (use this if you don't have a GPU, as the image is much smaller)
|
38 |
+
curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
|
39 |
+
docker compose --file compose.cpu.yaml up --detach
|
40 |
```
|
41 |
+
|
42 |
+
Using Docker
|
43 |
```bash
|
44 |
+
# for GPU support
|
45 |
+
docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
|
46 |
+
# for CPU only (use this if you don't have a GPU, as the image is much smaller)
|
47 |
+
docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
|
48 |
```
|
49 |
|
50 |
Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
|
Taskfile.yaml
CHANGED
@@ -11,19 +11,7 @@ tasks:
|
|
11 |
- pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
|
12 |
sources:
|
13 |
- src/**/*.py
|
14 |
-
build:
|
15 |
-
cmds:
|
16 |
-
- docker compose build
|
17 |
-
sources:
|
18 |
-
- Dockerfile.*
|
19 |
-
- src/**/*.py
|
20 |
create-multi-arch-builder: docker buildx create --name main --driver=docker-container
|
21 |
-
docker-build:
|
22 |
-
cmds:
|
23 |
-
- docker compose build --builder main {{.CLI_ARGS}}
|
24 |
-
sources:
|
25 |
-
- Dockerfile.*
|
26 |
-
- src/faster_whisper_server/*.py
|
27 |
cii:
|
28 |
cmds:
|
29 |
- act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}
|
|
|
11 |
- pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
|
12 |
sources:
|
13 |
- src/**/*.py
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
create-multi-arch-builder: docker buildx create --name main --driver=docker-container
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
cii:
|
16 |
cmds:
|
17 |
- act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}
|
compose.cpu.yaml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# include:
|
2 |
+
# - compose.observability.yaml
|
3 |
+
services:
|
4 |
+
faster-whisper-server:
|
5 |
+
extends:
|
6 |
+
file: compose.yaml
|
7 |
+
service: faster-whisper-server
|
8 |
+
image: fedirz/faster-whisper-server:latest-cpu
|
9 |
+
build:
|
10 |
+
args:
|
11 |
+
BASE_IMAGE: ubuntu:24.04
|
12 |
+
environment:
|
13 |
+
- WHISPER__MODEL=Systran/faster-whisper-small
|
14 |
+
volumes:
|
15 |
+
- hugging_face_cache:/root/.cache/huggingface
|
16 |
+
volumes:
|
17 |
+
hugging_face_cache:
|
compose.cuda-cdi.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# include:
|
2 |
+
# - compose.observability.yaml
|
3 |
+
# This file is for those who have the CDI Docker feature enabled
|
4 |
+
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
|
5 |
+
# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
|
6 |
+
services:
|
7 |
+
faster-whisper-server:
|
8 |
+
extends:
|
9 |
+
file: compose.cuda.yaml
|
10 |
+
service: faster-whisper-server
|
11 |
+
volumes:
|
12 |
+
- hugging_face_cache:/root/.cache/huggingface
|
13 |
+
deploy:
|
14 |
+
resources:
|
15 |
+
reservations:
|
16 |
+
# WARN: requires Docker Compose 2.24.2
|
17 |
+
# https://docs.docker.com/reference/compose-file/merge/#replace-value
|
18 |
+
devices: !override
|
19 |
+
- capabilities: ["gpu"]
|
20 |
+
driver: cdi
|
21 |
+
device_ids:
|
22 |
+
- nvidia.com/gpu=all
|
23 |
+
volumes:
|
24 |
+
hugging_face_cache:
|
compose.cuda.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# include:
|
2 |
+
# - compose.observability.yaml
|
3 |
+
services:
|
4 |
+
faster-whisper-server:
|
5 |
+
extends:
|
6 |
+
file: compose.yaml
|
7 |
+
service: faster-whisper-server
|
8 |
+
image: fedirz/faster-whisper-server:latest-cuda
|
9 |
+
build:
|
10 |
+
args:
|
11 |
+
BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
|
12 |
+
environment:
|
13 |
+
- WHISPER__MODEL=Systran/faster-whisper-large-v3
|
14 |
+
volumes:
|
15 |
+
- hugging_face_cache:/root/.cache/huggingface
|
16 |
+
deploy:
|
17 |
+
resources:
|
18 |
+
reservations:
|
19 |
+
devices:
|
20 |
+
- capabilities: ["gpu"]
|
21 |
+
volumes:
|
22 |
+
hugging_face_cache:
|
observability-compose.yaml → compose.observability.yaml
RENAMED
File without changes
|
compose.yaml
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
# TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
|
2 |
-
include:
|
3 |
-
- observability-compose.yaml
|
4 |
services:
|
5 |
-
faster-whisper-server
|
6 |
-
|
7 |
build:
|
8 |
-
dockerfile: Dockerfile
|
9 |
context: .
|
10 |
platforms:
|
11 |
- linux/amd64
|
@@ -13,39 +11,7 @@ services:
|
|
13 |
restart: unless-stopped
|
14 |
ports:
|
15 |
- 8000:8000
|
16 |
-
volumes:
|
17 |
-
- hugging_face_cache:/root/.cache/huggingface
|
18 |
develop:
|
19 |
watch:
|
20 |
- path: faster_whisper_server
|
21 |
action: rebuild
|
22 |
-
deploy:
|
23 |
-
resources:
|
24 |
-
reservations:
|
25 |
-
devices:
|
26 |
-
- capabilities: ["gpu"]
|
27 |
-
# If you have CDI feature enabled use the following instead
|
28 |
-
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
|
29 |
-
# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
|
30 |
-
# - driver: cdi
|
31 |
-
# device_ids:
|
32 |
-
# - nvidia.com/gpu=all
|
33 |
-
faster-whisper-server-cpu:
|
34 |
-
image: fedirz/faster-whisper-server:latest-cpu
|
35 |
-
build:
|
36 |
-
dockerfile: Dockerfile.cpu
|
37 |
-
context: .
|
38 |
-
platforms:
|
39 |
-
- linux/amd64
|
40 |
-
- linux/arm64
|
41 |
-
restart: unless-stopped
|
42 |
-
ports:
|
43 |
-
- 8000:8000
|
44 |
-
volumes:
|
45 |
-
- hugging_face_cache:/root/.cache/huggingface
|
46 |
-
develop:
|
47 |
-
watch:
|
48 |
-
- path: faster_whisper_server
|
49 |
-
action: rebuild
|
50 |
-
volumes:
|
51 |
-
hugging_face_cache:
|
|
|
1 |
# TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
|
|
|
|
|
2 |
services:
|
3 |
+
faster-whisper-server:
|
4 |
+
container_name: faster-whisper-server
|
5 |
build:
|
6 |
+
dockerfile: Dockerfile
|
7 |
context: .
|
8 |
platforms:
|
9 |
- linux/amd64
|
|
|
11 |
restart: unless-stopped
|
12 |
ports:
|
13 |
- 8000:8000
|
|
|
|
|
14 |
develop:
|
15 |
watch:
|
16 |
- path: faster_whisper_server
|
17 |
action: rebuild
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|