Fedir Zadniprovskyi commited on
Commit
8fc9285
1 Parent(s): 4005fd1

feat: reorganize docker files

Browse files
.github/workflows/docker-build-and-push.yaml CHANGED
@@ -13,11 +13,12 @@ jobs:
13
  runs-on: ubuntu-latest
14
  strategy:
15
  matrix:
16
- dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
 
17
  include:
18
- - dockerfile: Dockerfile.cuda
19
  tag-suffix: -cuda
20
- - dockerfile: Dockerfile.cpu
21
  tag-suffix: -cpu
22
  steps:
23
  - uses: actions/checkout@v4
@@ -45,7 +46,9 @@ jobs:
45
  uses: docker/build-push-action@v6
46
  with:
47
  context: .
48
- file: ${{ matrix.dockerfile }}
 
 
49
  push: true
50
  platforms: linux/amd64,linux/arm64
51
  tags: ${{ steps.meta.outputs.tags }}
 
13
  runs-on: ubuntu-latest
14
  strategy:
15
  matrix:
16
+ # https://hub.docker.com/r/nvidia/cuda/tags
17
+ base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04']
18
  include:
19
+ - base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
20
  tag-suffix: -cuda
21
+ - base-image: ubuntu:24.04
22
  tag-suffix: -cpu
23
  steps:
24
  - uses: actions/checkout@v4
 
46
  uses: docker/build-push-action@v6
47
  with:
48
  context: .
49
+ file: Dockerfile
50
+ build-args: |
51
+ BASE_IMAGE=${{ matrix.base-image }}
52
  push: true
53
  platforms: linux/amd64,linux/arm64
54
  tags: ${{ steps.meta.outputs.tags }}
Dockerfile.cuda → Dockerfile RENAMED
@@ -1,22 +1,28 @@
1
- FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
 
2
  LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
3
  # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
4
  # hadolint ignore=DL3008
5
  RUN apt-get update && \
6
- DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
7
- apt-get clean && \
8
- rm -rf /var/lib/apt/lists/*
9
- COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
10
- WORKDIR /root/faster-whisper-server
 
 
 
11
  # https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
 
12
  RUN --mount=type=cache,target=/root/.cache/uv \
13
- --mount=type=bind,source=uv.lock,target=uv.lock \
14
- --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
15
- uv sync --frozen --no-install-project
16
- COPY ./src ./pyproject.toml ./uv.lock ./
17
  RUN --mount=type=cache,target=/root/.cache/uv \
18
- uv sync --frozen --extra ui --extra opentelemetry
19
  ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
20
  ENV UVICORN_HOST=0.0.0.0
21
  ENV UVICORN_PORT=8000
 
22
  CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
 
1
+ ARG BASE_IMAGE=nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
2
+ FROM ${BASE_IMAGE}
3
  LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
4
  # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
5
  # hadolint ignore=DL3008
6
  RUN apt-get update && \
7
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
8
+ apt-get clean && \
9
+ rm -rf /var/lib/apt/lists/*
10
+ USER ubuntu
11
+ ENV HOME=/home/ubuntu \
12
+ PATH=/home/ubuntu/.local/bin:$PATH
13
+ WORKDIR $HOME/faster-whisper-server
14
+ COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
15
  # https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
16
+ # https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode
17
  RUN --mount=type=cache,target=/root/.cache/uv \
18
+ --mount=type=bind,source=uv.lock,target=uv.lock \
19
+ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
20
+ uv sync --frozen --compile-bytecode --no-install-project
21
+ COPY --chown=ubuntu ./src ./pyproject.toml ./uv.lock ./
22
  RUN --mount=type=cache,target=/root/.cache/uv \
23
+ uv sync --frozen --compile-bytecode --extra ui --extra opentelemetry
24
  ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
25
  ENV UVICORN_HOST=0.0.0.0
26
  ENV UVICORN_PORT=8000
27
+ EXPOSE 8000
28
  CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
Dockerfile.cpu DELETED
@@ -1,22 +0,0 @@
1
- FROM ubuntu:24.04
2
- LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
3
- # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
4
- # hadolint ignore=DL3008
5
- RUN apt-get update && \
6
- DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
7
- apt-get clean && \
8
- rm -rf /var/lib/apt/lists/*
9
- COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
10
- WORKDIR /root/faster-whisper-server
11
- # https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
12
- RUN --mount=type=cache,target=/root/.cache/uv \
13
- --mount=type=bind,source=uv.lock,target=uv.lock \
14
- --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
15
- uv sync --frozen --no-install-project
16
- COPY ./src ./pyproject.toml ./uv.lock ./
17
- RUN --mount=type=cache,target=/root/.cache/uv \
18
- uv sync --frozen --extra ui
19
- ENV WHISPER__MODEL=Systran/faster-whisper-small
20
- ENV UVICORN_HOST=0.0.0.0
21
- ENV UVICORN_PORT=8000
22
- CMD ["uv", "run", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -25,18 +25,26 @@ See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio)
25
 
26
  ![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
27
 
28
- Using Docker
 
 
29
  ```bash
30
- docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cuda
31
- # or
32
- docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu
 
 
 
 
 
33
  ```
34
- Using Docker Compose
 
35
  ```bash
36
- curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
37
- docker compose up --detach faster-whisper-server-cuda
38
- # or
39
- docker compose up --detach faster-whisper-server-cpu
40
  ```
41
 
42
  Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
 
25
 
26
  ![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
27
 
28
+ Using Docker Compose (Recommended)
29
+ NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
30
+
31
  ```bash
32
+ curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
33
+
34
+ # for GPU support
35
+ curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
36
+ docker compose --file compose.cuda.yaml up --detach
37
+ # for CPU only (use this if you don't have a GPU, as the image is much smaller)
38
+ curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
39
+ docker compose --file compose.cpu.yaml up --detach
40
  ```
41
+
42
+ Using Docker
43
  ```bash
44
+ # for GPU support
45
+ docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
46
+ # for CPU only (use this if you don't have a GPU, as the image is much smaller)
47
+ docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
48
  ```
49
 
50
  Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
Taskfile.yaml CHANGED
@@ -11,19 +11,7 @@ tasks:
11
  - pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
12
  sources:
13
  - src/**/*.py
14
- build:
15
- cmds:
16
- - docker compose build
17
- sources:
18
- - Dockerfile.*
19
- - src/**/*.py
20
  create-multi-arch-builder: docker buildx create --name main --driver=docker-container
21
- docker-build:
22
- cmds:
23
- - docker compose build --builder main {{.CLI_ARGS}}
24
- sources:
25
- - Dockerfile.*
26
- - src/faster_whisper_server/*.py
27
  cii:
28
  cmds:
29
  - act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}
 
11
  - pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
12
  sources:
13
  - src/**/*.py
 
 
 
 
 
 
14
  create-multi-arch-builder: docker buildx create --name main --driver=docker-container
 
 
 
 
 
 
15
  cii:
16
  cmds:
17
  - act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}
compose.cpu.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # include:
2
+ # - compose.observability.yaml
3
+ services:
4
+ faster-whisper-server:
5
+ extends:
6
+ file: compose.yaml
7
+ service: faster-whisper-server
8
+ image: fedirz/faster-whisper-server:latest-cpu
9
+ build:
10
+ args:
11
+ BASE_IMAGE: ubuntu:24.04
12
+ environment:
13
+ - WHISPER__MODEL=Systran/faster-whisper-small
14
+ volumes:
15
+ - hugging_face_cache:/root/.cache/huggingface
16
+ volumes:
17
+ hugging_face_cache:
compose.cuda-cdi.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # include:
2
+ # - compose.observability.yaml
3
+ # This file is for those who have the CDI Docker feature enabled
4
+ # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
5
+ # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
6
+ services:
7
+ faster-whisper-server:
8
+ extends:
9
+ file: compose.cuda.yaml
10
+ service: faster-whisper-server
11
+ volumes:
12
+ - hugging_face_cache:/root/.cache/huggingface
13
+ deploy:
14
+ resources:
15
+ reservations:
16
+ # WARN: requires Docker Compose 2.24.2
17
+ # https://docs.docker.com/reference/compose-file/merge/#replace-value
18
+ devices: !override
19
+ - capabilities: ["gpu"]
20
+ driver: cdi
21
+ device_ids:
22
+ - nvidia.com/gpu=all
23
+ volumes:
24
+ hugging_face_cache:
compose.cuda.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # include:
2
+ # - compose.observability.yaml
3
+ services:
4
+ faster-whisper-server:
5
+ extends:
6
+ file: compose.yaml
7
+ service: faster-whisper-server
8
+ image: fedirz/faster-whisper-server:latest-cuda
9
+ build:
10
+ args:
11
+ BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
12
+ environment:
13
+ - WHISPER__MODEL=Systran/faster-whisper-large-v3
14
+ volumes:
15
+ - hugging_face_cache:/root/.cache/huggingface
16
+ deploy:
17
+ resources:
18
+ reservations:
19
+ devices:
20
+ - capabilities: ["gpu"]
21
+ volumes:
22
+ hugging_face_cache:
observability-compose.yaml → compose.observability.yaml RENAMED
File without changes
compose.yaml CHANGED
@@ -1,11 +1,9 @@
1
  # TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
2
- include:
3
- - observability-compose.yaml
4
  services:
5
- faster-whisper-server-cuda:
6
- image: fedirz/faster-whisper-server:latest-cuda
7
  build:
8
- dockerfile: Dockerfile.cuda
9
  context: .
10
  platforms:
11
  - linux/amd64
@@ -13,39 +11,7 @@ services:
13
  restart: unless-stopped
14
  ports:
15
  - 8000:8000
16
- volumes:
17
- - hugging_face_cache:/root/.cache/huggingface
18
  develop:
19
  watch:
20
  - path: faster_whisper_server
21
  action: rebuild
22
- deploy:
23
- resources:
24
- reservations:
25
- devices:
26
- - capabilities: ["gpu"]
27
- # If you have CDI feature enabled use the following instead
28
- # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
29
- # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
30
- # - driver: cdi
31
- # device_ids:
32
- # - nvidia.com/gpu=all
33
- faster-whisper-server-cpu:
34
- image: fedirz/faster-whisper-server:latest-cpu
35
- build:
36
- dockerfile: Dockerfile.cpu
37
- context: .
38
- platforms:
39
- - linux/amd64
40
- - linux/arm64
41
- restart: unless-stopped
42
- ports:
43
- - 8000:8000
44
- volumes:
45
- - hugging_face_cache:/root/.cache/huggingface
46
- develop:
47
- watch:
48
- - path: faster_whisper_server
49
- action: rebuild
50
- volumes:
51
- hugging_face_cache:
 
1
  # TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
 
 
2
  services:
3
+ faster-whisper-server:
4
+ container_name: faster-whisper-server
5
  build:
6
+ dockerfile: Dockerfile
7
  context: .
8
  platforms:
9
  - linux/amd64
 
11
  restart: unless-stopped
12
  ports:
13
  - 8000:8000
 
 
14
  develop:
15
  watch:
16
  - path: faster_whisper_server
17
  action: rebuild