From ddfbffe8f3a08a04edfe16e2c3446c11742d8e13 Mon Sep 17 00:00:00 2001
From: Georgi Sundberg <44953283+Chugarah@users.noreply.github.com>
Date: Mon, 31 Mar 2025 06:33:00 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=93=A6=20Multi-Format=20Export=20Support?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 🚀 Added simultaneous export for all transcript formats (`txt`, `srt`, `vtt`, `tsv`, `json`)
- 🗂️ Packaged outputs into a single ZIP file for easy download
- 🔄 Updated workflows & Docker setup to support the new feature
- 📖 Updated README with relevant changes
---
 Dockerfile.gpu                                |   3 +
 README.md                                     |   3 +
 app/asr_models/faster_whisper_engine.py       |  42 ++++--
 app/asr_models/mbain_whisperx_engine.py       |  18 ++-
 app/asr_models/openai_whisper_engine.py       |  18 ++-
 app/utils.py                                  | 126 ++++++++++++++++--
 app/webservice.py                             |  48 ++++++-
 docker-compose-cpu.yml                        |  17 +++
 docker-compose.yml                            |  35 ++++-
 example.env                                   |  25 ++++
 whisper-asr-webservice-main/.dockerignore     |   3 +
 .../.github/FUNDING.yml                       |   4 +
 .../.github/workflows/docker-publish.yml      |  46 +++++++
 .../.github/workflows/documentation.yml       |  27 ++++
 whisper-asr-webservice-main/.gitignore        |  44 ++++++
 15 files changed, 427 insertions(+), 32 deletions(-)
 create mode 100644 docker-compose-cpu.yml
 create mode 100644 example.env 
 create mode 100644 whisper-asr-webservice-main/.dockerignore
 create mode 100644 whisper-asr-webservice-main/.github/FUNDING.yml
 create mode 100644 whisper-asr-webservice-main/.github/workflows/docker-publish.yml
 create mode 100644 whisper-asr-webservice-main/.github/workflows/documentation.yml
 create mode 100644 whisper-asr-webservice-main/.gitignore

diff --git a/Dockerfile.gpu b/Dockerfile.gpu
index 903b7b3..e7deff6 100644
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@@ -15,6 +15,9 @@ RUN export DEBIAN_FRONTEND=noninteractive \
     python${PYTHON_VERSION}-venv \
     python3-pip \
     libcudnn8 \
+    libcudnn8-dev \
+    # Make sure to install all required libcudnn components
+    libcudnn8-samples \
     python3-pip \
     && rm -rf /var/lib/apt/lists/*
 
diff --git a/README.md b/README.md
index e8cd150..7c40172 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,7 @@ docker run -d -p 9000:9000 \
 
 - Multiple ASR engines support (OpenAI Whisper, Faster Whisper, WhisperX)
 - Multiple output formats (text, JSON, VTT, SRT, TSV)
+- Support for outputting all formats simultaneously with a single request
 - Word-level timestamps support
 - Voice activity detection (VAD) filtering
 - Speaker diarization (with WhisperX)
@@ -90,3 +91,5 @@ After starting the service, visit `http://localhost:9000` or `http://0.0.0.0:900
 ## Credits
 
 - This software uses libraries from the [FFmpeg](http://ffmpeg.org) project under the [LGPLv2.1](http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html)
+
+
diff --git a/app/asr_models/faster_whisper_engine.py b/app/asr_models/faster_whisper_engine.py
index d7c3c44..ecba45b 100644
--- a/app/asr_models/faster_whisper_engine.py
+++ b/app/asr_models/faster_whisper_engine.py
@@ -1,4 +1,5 @@
 import time
+import os
 from io import StringIO
 from threading import Thread
 from typing import BinaryIO, Union
@@ -8,7 +9,7 @@
 
 from app.asr_models.asr_model import ASRModel
 from app.config import CONFIG
-from app.utils import ResultWriter, WriteJSON, WriteSRT, WriteTSV, WriteTXT, WriteVTT
+from app.utils import ResultWriter, WriteJSON, WriteSRT, WriteTSV, WriteTXT, WriteVTT, WriteAll
 
 
 class FasterWhisperASR(ASRModel):
@@ -59,10 +60,23 @@ def transcribe(
                 text = text + segment.text
             result = {"language": options_dict.get("language", info.language), "segments": segments, "text": text}
 
+        # Store the output directory and audio path for the "all" option
+        self.output_dir = os.environ.get("OUTPUT_DIR", "/tmp")
+        self.audio_path = os.environ.get("AUDIO_FILENAME", "audio")
+
+        # For "all" output format, create and return the zip bytes
+        if output == "all":
+            writer = WriteAll(self.output_dir)
+            zip_bytes = writer.create_zip_bytes(result)
+            # Create a generator that yields the bytes
+            def bytes_generator():
+                yield zip_bytes
+            return bytes_generator()
+            
+        # For other formats, write to StringIO and return that
         output_file = StringIO()
         self.write_result(result, output_file, output)
         output_file.seek(0)
-
         return output_file
 
     def language_detection(self, audio):
@@ -84,13 +98,25 @@ def language_detection(self, audio):
         return detected_lang_code, detected_language_confidence
 
     def write_result(self, result: dict, file: BinaryIO, output: Union[str, None]):
+        """
+        Write the transcription result to the specified output format.
+        
+        For 'all' format, this function is not directly used as the transcribe method
+        handles it with create_zip_bytes.
+        For other formats, writes directly to the provided file object.
+        """
+        # Initialize the appropriate writer class based on the output format
         if output == "srt":
-            WriteSRT(ResultWriter).write_result(result, file=file)
+            writer_class = WriteSRT
         elif output == "vtt":
-            WriteVTT(ResultWriter).write_result(result, file=file)
+            writer_class = WriteVTT
         elif output == "tsv":
-            WriteTSV(ResultWriter).write_result(result, file=file)
+            writer_class = WriteTSV
         elif output == "json":
-            WriteJSON(ResultWriter).write_result(result, file=file)
-        else:
-            WriteTXT(ResultWriter).write_result(result, file=file)
+            writer_class = WriteJSON
+        else:  # Default to txt
+            writer_class = WriteTXT
+        
+        # Create a ResultWriter instance and write to the file
+        writer = writer_class(self.output_dir)
+        writer.write_result(result, file=file)
diff --git a/app/asr_models/mbain_whisperx_engine.py b/app/asr_models/mbain_whisperx_engine.py
index 87494a3..d5c1799 100644
--- a/app/asr_models/mbain_whisperx_engine.py
+++ b/app/asr_models/mbain_whisperx_engine.py
@@ -1,4 +1,5 @@
 import time
+import os
 from io import StringIO
 from threading import Thread
 from typing import BinaryIO, Union
@@ -9,6 +10,7 @@
 
 from app.asr_models.asr_model import ASRModel
 from app.config import CONFIG
+from app.utils import WriteAll
 
 
 class WhisperXASR(ASRModel):
@@ -85,10 +87,24 @@ def transcribe(
             result = whisperx.assign_word_speakers(diarize_segments, result)
         result["language"] = language
 
+        # Store the output directory and audio path for the "all" option
+        self.output_dir = os.environ.get("OUTPUT_DIR", "/tmp")
+        self.audio_path = os.environ.get("AUDIO_FILENAME", "audio")
+
+        # For "all" output format, create and return the zip bytes
+        if output == "all":
+            # Import WriteAll from app.utils if needed
+            writer = WriteAll(self.output_dir)
+            zip_bytes = writer.create_zip_bytes(result)
+            # Create a generator that yields the bytes
+            def bytes_generator():
+                yield zip_bytes
+            return bytes_generator()
+
+        # For other formats, write to StringIO and return that
         output_file = StringIO()
         self.write_result(result, output_file, output)
         output_file.seek(0)
-
         return output_file
 
     def language_detection(self, audio):
diff --git a/app/asr_models/openai_whisper_engine.py b/app/asr_models/openai_whisper_engine.py
index 655d682..efd13e0 100644
--- a/app/asr_models/openai_whisper_engine.py
+++ b/app/asr_models/openai_whisper_engine.py
@@ -1,4 +1,5 @@
 import time
+import os
 from io import StringIO
 from threading import Thread
 from typing import BinaryIO, Union
@@ -9,6 +10,7 @@
 
 from app.asr_models.asr_model import ASRModel
 from app.config import CONFIG
+from app.utils import WriteAll
 
 
 class OpenAIWhisperASR(ASRModel):
@@ -49,10 +51,24 @@ def transcribe(
         with self.model_lock:
             result = self.model.transcribe(audio, **options_dict)
 
+        # Store the output directory and audio path for the "all" option
+        self.output_dir = os.environ.get("OUTPUT_DIR", "/tmp")
+        self.audio_path = os.environ.get("AUDIO_FILENAME", "audio")
+
+        # For "all" output format, create and return the zip bytes
+        if output == "all":
+            # Import WriteAll from app.utils if needed
+            writer = WriteAll(self.output_dir)
+            zip_bytes = writer.create_zip_bytes(result)
+            # Create a generator that yields the bytes
+            def bytes_generator():
+                yield zip_bytes
+            return bytes_generator()
+
+        # For other formats, write to StringIO and return that
         output_file = StringIO()
         self.write_result(result, output_file, output)
         output_file.seek(0)
-
         return output_file
 
     def language_detection(self, audio):
diff --git a/app/utils.py b/app/utils.py
index ddc8a99..85f1f75 100644
--- a/app/utils.py
+++ b/app/utils.py
@@ -1,5 +1,7 @@
 import json
 import os
+import io
+import zipfile
 from dataclasses import asdict
 from typing import BinaryIO, TextIO
 
@@ -32,7 +34,9 @@ class WriteTXT(ResultWriter):
 
     def write_result(self, result: dict, file: TextIO):
         for segment in result["segments"]:
-            print(segment.text.strip(), file=file, flush=True)
+            # Handle both segment as dict and as object
+            text = segment["text"] if isinstance(segment, dict) else segment.text
+            print(text.strip(), file=file, flush=True)
 
 
 class WriteVTT(ResultWriter):
@@ -41,9 +45,19 @@ class WriteVTT(ResultWriter):
     def write_result(self, result: dict, file: TextIO):
         print("WEBVTT\n", file=file)
         for segment in result["segments"]:
+            # Handle both segment as dict and as object
+            if isinstance(segment, dict):
+                start = segment["start"]
+                end = segment["end"]
+                text = segment["text"]
+            else:
+                start = segment.start
+                end = segment.end
+                text = segment.text
+                
             print(
-                f"{format_timestamp(segment.start)} --> {format_timestamp(segment.end)}\n"
-                f"{segment.text.strip().replace('-->', '->')}\n",
+                f"{format_timestamp(start)} --> {format_timestamp(end)}\n"
+                f"{text.strip().replace('-->', '->')}\n",
                 file=file,
                 flush=True,
             )
@@ -54,12 +68,22 @@ class WriteSRT(ResultWriter):
 
     def write_result(self, result: dict, file: TextIO):
         for i, segment in enumerate(result["segments"], start=1):
+            # Handle both segment as dict and as object
+            if isinstance(segment, dict):
+                start = segment["start"]
+                end = segment["end"]
+                text = segment["text"]
+            else:
+                start = segment.start
+                end = segment.end
+                text = segment.text
+                
             # write srt lines
             print(
                 f"{i}\n"
-                f"{format_timestamp(segment.start, always_include_hours=True, decimal_marker=',')} --> "
-                f"{format_timestamp(segment.end, always_include_hours=True, decimal_marker=',')}\n"
-                f"{segment.text.strip().replace('-->', '->')}\n",
+                f"{format_timestamp(start, always_include_hours=True, decimal_marker=',')} --> "
+                f"{format_timestamp(end, always_include_hours=True, decimal_marker=',')}\n"
+                f"{text.strip().replace('-->', '->')}\n",
                 file=file,
                 flush=True,
             )
@@ -80,9 +104,19 @@ class WriteTSV(ResultWriter):
     def write_result(self, result: dict, file: TextIO):
         print("start", "end", "text", sep="\t", file=file)
         for segment in result["segments"]:
-            print(round(1000 * segment.start), file=file, end="\t")
-            print(round(1000 * segment.end), file=file, end="\t")
-            print(segment.text.strip().replace("\t", " "), file=file, flush=True)
+            # Handle both segment as dict and as object
+            if isinstance(segment, dict):
+                start = segment["start"]
+                end = segment["end"]
+                text = segment["text"]
+            else:
+                start = segment.start
+                end = segment.end
+                text = segment.text
+                
+            print(round(1000 * start), file=file, end="\t")
+            print(round(1000 * end), file=file, end="\t")
+            print(text.strip().replace("\t", " "), file=file, flush=True)
 
 
 class WriteJSON(ResultWriter):
@@ -90,10 +124,82 @@ class WriteJSON(ResultWriter):
 
     def write_result(self, result: dict, file: TextIO):
         if "segments" in result:
-            result["segments"] = [asdict(segment) for segment in result["segments"]]
+            # Check if segments are already dictionaries or need to be converted
+            if result["segments"] and not isinstance(result["segments"][0], dict):
+                result["segments"] = [asdict(segment) for segment in result["segments"]]
         json.dump(result, file)
 
 
+class WriteAll:
+    """
+    Write a transcript to multiple files in all supported formats.
+    """
+
+    def __init__(self, output_dir: str):
+        self.output_dir = output_dir
+        self.writers = {
+            "txt": WriteTXT(output_dir),
+            "vtt": WriteVTT(output_dir),
+            "srt": WriteSRT(output_dir),
+            "tsv": WriteTSV(output_dir),
+            "json": WriteJSON(output_dir)
+        }
+
+    def __call__(self, result: dict, audio_path: str):
+        for format_name, writer in self.writers.items():
+            try:
+                writer(result, audio_path)
+            except Exception as e:
+                print(f"Error in {format_name} writer: {str(e)}")
+                # Continue with other formats even if one fails
+
+    def create_zip_bytes(self, result: dict):
+        """
+        Create a zip file in memory and return its bytes.
+        This creates a valid zip file with all transcript formats.
+        """
+        # Create a new in-memory zip file
+        buffer = io.BytesIO()
+        
+        try:
+            # Open the zip file for writing
+            with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                # Write each format to the zip file
+                formats = {
+                    "txt": WriteTXT,
+                    "vtt": WriteVTT,
+                    "srt": WriteSRT,
+                    "tsv": WriteTSV,
+                    "json": WriteJSON
+                }
+                
+                for format_name, writer_class in formats.items():
+                    try:
+                        # Create a buffer for this format's content
+                        output = io.StringIO()
+                        
+                        # Write the result to the buffer
+                        writer = writer_class(self.output_dir)
+                        writer.write_result(result, output)
+                        
+                        # Get the text content and add it to the zip
+                        content = output.getvalue().encode('utf-8')  # Convert string to bytes
+                        zip_file.writestr(f"transcript.{format_name}", content)
+                        
+                    except Exception as e:
+                        print(f"Error adding {format_name} to zip: {str(e)}")
+                        # Continue with other formats
+            
+            # Reset the buffer position and get the zip bytes
+            buffer.seek(0)
+            return buffer.read()
+            
+        except Exception as e:
+            print(f"Error creating zip file: {str(e)}")
+            # Return an empty buffer if zip creation fails
+            return b""
+
+
 def load_audio(file: BinaryIO, encode=True, sr: int = CONFIG.SAMPLE_RATE):
     """
     Open an audio file object and read as mono waveform, resampling as necessary.
diff --git a/app/webservice.py b/app/webservice.py
index 8f4fa6a..78321c2 100644
--- a/app/webservice.py
+++ b/app/webservice.py
@@ -8,9 +8,10 @@
 import uvicorn
 from fastapi import FastAPI, File, Query, UploadFile, applications
 from fastapi.openapi.docs import get_swagger_ui_html
-from fastapi.responses import RedirectResponse, StreamingResponse
+from fastapi.responses import RedirectResponse, StreamingResponse, Response, FileResponse
 from fastapi.staticfiles import StaticFiles
 from whisper import tokenizer
+import tempfile
 
 from app.config import CONFIG
 from app.factory.asr_model_factory import ASRModelFactory
@@ -86,8 +87,14 @@ async def asr(
         description="Max speakers in this file",
         include_in_schema=(True if CONFIG.ASR_ENGINE == "whisperx" else False),
     ),
-    output: Union[str, None] = Query(default="txt", enum=["txt", "vtt", "srt", "tsv", "json"]),
+    output: Union[str, None] = Query(default="txt", enum=["txt", "vtt", "srt", "tsv", "json", "all"]),
 ):
+    # Set environment variables for output directory and audio filename if needed for "all" output
+    if output == "all":
+        os.environ["OUTPUT_DIR"] = CONFIG.TEMP_DIR if hasattr(CONFIG, "TEMP_DIR") else "/tmp"
+        os.environ["AUDIO_FILENAME"] = audio_file.filename
+    
+    # Process the audio file with the ASR model
     result = asr_model.transcribe(
         load_audio(audio_file.file, encode),
         task,
@@ -98,13 +105,40 @@ async def asr(
         {"diarize": diarize, "min_speakers": min_speakers, "max_speakers": max_speakers},
         output,
     )
+    
+    # For "all" output format (zip file)
+    if output == "all":
+        # Get the bytes from the generator
+        zip_bytes = next(result)
+        
+        # Create a temporary file to save the zip
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as temp_file:
+            temp_file.write(zip_bytes)
+            temp_path = temp_file.name
+        
+        # Create a nice filename for the download
+        base_filename = os.path.basename(audio_file.filename)
+        download_filename = f"{os.path.splitext(base_filename)[0]}_transcripts.zip"
+        
+        # Use FastAPI's FileResponse to serve the file
+        return FileResponse(
+            path=temp_path,
+            filename=download_filename,
+            media_type="application/zip",
+            headers={"Asr-Engine": CONFIG.ASR_ENGINE}
+        )
+    
+    # For other formats, continue using StreamingResponse
+    # Set the appropriate content type based on output format
+    content_type = "text/plain"
+    if output == "json":
+        content_type = "application/json"
+    
+    # Return the streaming response for text-based formats
     return StreamingResponse(
         result,
-        media_type="text/plain",
-        headers={
-            "Asr-Engine": CONFIG.ASR_ENGINE,
-            "Content-Disposition": f'attachment; filename="{quote(audio_file.filename)}.{output}"',
-        },
+        media_type=content_type,
+        headers={"Asr-Engine": CONFIG.ASR_ENGINE}
     )
 
 
diff --git a/docker-compose-cpu.yml b/docker-compose-cpu.yml
new file mode 100644
index 0000000..5fb912b
--- /dev/null
+++ b/docker-compose-cpu.yml
@@ -0,0 +1,17 @@
+version: "3.4"
+
+services:
+  whisper-asr-webservice:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      - ASR_MODEL=base
+    ports:
+      - "9000:9000"
+    volumes:
+      - ./app:/app/app
+      - cache-whisper:/root/.cache
+
+volumes:
+  cache-whisper:
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 2fab6cb..bc8f326 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,17 +1,42 @@
-version: "3.4"
-
 services:
   whisper-asr-webservice:
     build:
       context: .
-      dockerfile: Dockerfile
-    environment:
-      - ASR_MODEL=base
+      dockerfile: Dockerfile.gpu
+    restart: unless-stopped
+    env_file: .env
     ports:
       - "9000:9000"
+    dns:
+    - 172.20.0.2  # CoreDNS server IP
     volumes:
       - ./app:/app/app
+      - ./data:/data/whisper
       - cache-whisper:/root/.cache
+      - huggingface-cache:/root/.cache/huggingface
+    networks:
+      - monitoring
+      - xinference-network
+      - core-dns_core-network
+      - whisper-network
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+
+networks:
+  whisper-network:
+    driver: bridge
+  monitoring:
+    driver: bridge
+  xinference-network:
+    driver: bridge
+  core-dns_core-network:
+    external: true
 
 volumes:
   cache-whisper:
+  huggingface-cache:
\ No newline at end of file
diff --git a/example.env  b/example.env 
new file mode 100644
index 0000000..d4026cd
--- /dev/null
+++ b/example.env 	
@@ -0,0 +1,25 @@
+# Whisper ASR Webservice Environment Configuration
+
+# Model configuration
+ASR_ENGINE=whisperx
+ASR_MODEL=large-v3
+
+# Performance tuning
+COMPUTE_TYPE=float32
+NUM_WORKERS=4
+BATCH_SIZE=16
+BEAM_SIZE=4
+
+# Whisper X Settings
+SUBTITLE_MAX_LINE_WIDTH=1000
+SUBTITLE_MAX_LINE_COUNT=2
+SUBTITLE_HIGHLIGHT_WORDS=true
+
+
+# System settings
+# 16000 the optimal for Whisper to work with Audio files
+# 24000k seens ti be working for now but incase of issues lower it to 16000
+SAMPLE_RATE=24000 
+HF_HOME=/root/.cache/huggingface
+CPU_THREADS=4 
+
diff --git a/whisper-asr-webservice-main/.dockerignore b/whisper-asr-webservice-main/.dockerignore
new file mode 100644
index 0000000..59e2f83
--- /dev/null
+++ b/whisper-asr-webservice-main/.dockerignore
@@ -0,0 +1,3 @@
+.git
+.venv
+venv
\ No newline at end of file
diff --git a/whisper-asr-webservice-main/.github/FUNDING.yml b/whisper-asr-webservice-main/.github/FUNDING.yml
new file mode 100644
index 0000000..b558be2
--- /dev/null
+++ b/whisper-asr-webservice-main/.github/FUNDING.yml
@@ -0,0 +1,4 @@
+# These are supported funding model platforms
+
+github: [ahmetoner]
+custom: ['https://bmc.link/ahmetoner']
diff --git a/whisper-asr-webservice-main/.github/workflows/docker-publish.yml b/whisper-asr-webservice-main/.github/workflows/docker-publish.yml
new file mode 100644
index 0000000..571d2b8
--- /dev/null
+++ b/whisper-asr-webservice-main/.github/workflows/docker-publish.yml
@@ -0,0 +1,46 @@
+name: Publish Docker Image
+on:
+  push:
+    tags:        
+      - '*'
+    branches:
+      - debug
+
+env:
+  DOCKER_USER: ${{secrets.DOCKER_USER}}
+  DOCKER_PASSWORD: ${{secrets.DOCKER_PASSWORD}}
+  REPO_NAME: ${{secrets.REPO_NAME}}
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - docker_file: Dockerfile
+            platforms: linux/arm64,linux/amd64
+          - docker_file: Dockerfile.gpu
+            tag_extension: -gpu
+            platforms: linux/amd64
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+    - name: Set up QEMU
+      uses: docker/setup-qemu-action@v1
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v1
+    - name: Login to DockerHub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKER_USER }}
+        password: ${{ secrets.DOCKER_PASSWORD }}
+    - name: Build and Publish the Docker debug image
+      if: github.ref == 'refs/heads/debug'
+      run: |
+        DOCKER_IMAGE_DEBUG=$DOCKER_USER/$REPO_NAME:debug${{ matrix.tag_extension }}
+        docker buildx build . --no-cache --platform=${{ matrix.platforms }} -t "${DOCKER_IMAGE_DEBUG}" -f ${{ matrix.docker_file }} --push
+    - name: Build and Publish the Docker image
+      if: github.ref != 'refs/heads/debug'
+      run: |
+        DOCKER_IMAGE_LATEST=$DOCKER_USER/$REPO_NAME:latest${{ matrix.tag_extension }}
+        DOCKER_IMAGE_VERSION=$DOCKER_USER/$REPO_NAME:$GITHUB_REF_NAME${{ matrix.tag_extension }}
+        docker buildx build . --no-cache --platform=${{ matrix.platforms }} -t "${DOCKER_IMAGE_LATEST}" -t "${DOCKER_IMAGE_VERSION}" -f ${{ matrix.docker_file }} --push
diff --git a/whisper-asr-webservice-main/.github/workflows/documentation.yml b/whisper-asr-webservice-main/.github/workflows/documentation.yml
new file mode 100644
index 0000000..03db5a8
--- /dev/null
+++ b/whisper-asr-webservice-main/.github/workflows/documentation.yml
@@ -0,0 +1,27 @@
+name: Documentation
+on:
+  push:
+    tags:        
+      - '*'
+    branches:
+      - docs
+permissions:
+  contents: write
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: github.event.repository.fork == false
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.x
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+      - uses: actions/cache@v3
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: pip install mkdocs-material pymdown-extensions
+      - run: mkdocs gh-deploy --force
diff --git a/whisper-asr-webservice-main/.gitignore b/whisper-asr-webservice-main/.gitignore
new file mode 100644
index 0000000..35e5869
--- /dev/null
+++ b/whisper-asr-webservice-main/.gitignore
@@ -0,0 +1,44 @@
+*.pyc
+
+# Packages
+*.egg
+!/tests/**/*.egg
+/*.egg-info
+/dist/*
+build
+_build
+.cache
+*.so
+venv
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.pytest_cache
+
+.DS_Store
+.idea/*
+.python-version
+.vscode/*
+
+/test.py
+/test_*.*
+
+/setup.cfg
+MANIFEST.in
+/setup.py
+/docs/site/*
+/tests/fixtures/simple_project/setup.py
+/tests/fixtures/project_with_extras/setup.py
+.mypy_cache
+
+.venv
+/releases/*
+pip-wheel-metadata
+/poetry.toml
+
+poetry/core/*
+
+public