Skip to content

BatchedInferencePipeline cannot be used with multithreading? #1333

@Crissium

Description

@Crissium

How do you use multiple GPUs to do batched transcription in parallel? Strangely with the following code snippet, only one GPU is used at one time, the other's usage staying at 0%, then moments later, the latter spins up while the former falls back to zero.

python transcribe.py ../../Data/Original/LibriSpeech/test-clean ../../Data/Generated/LibriSpeech/test-clean/whisper-large-v3.jsonl 0 1

transcribe.py

import json
import os
import sys
from concurrent.futures import ThreadPoolExecutor
from faster_whisper import WhisperModel, BatchedInferencePipeline
from typing import Generator


BATCH_SIZE = 50


def transcribe_file(
	pipeline: BatchedInferencePipeline,
	input_filename: str
) -> Generator[tuple[str, int, int, str], None, None]:
	segments, _ = pipeline.transcribe(input_filename, language='en', batch_size=BATCH_SIZE)
	for segment in segments:
		yield (
			os.path.basename(input_filename),
			int(segment.start * 1000),
			int(segment.end * 1000),
			segment.text.strip()
		)


def transcribe_directory(
	pipeline: BatchedInferencePipeline,
	root_dir: str,
	output_file: str,
	num_workers: int
) -> None:
	with ThreadPoolExecutor(max_workers=num_workers) as executor:
		futures = []
		for root, _, files in os.walk(root_dir):
			for file in files:
				if file.endswith('.flac'):
					input_filename = os.path.join(root, file)
					futures.append(executor.submit(transcribe_file, pipeline, input_filename))

		with open(output_file, 'w') as f:
			for future in futures:
				for segment in future.result():
					f.write(json.dumps({
						'filename': segment[0],
						'start_time': segment[1],
						'end_time': segment[2],
						'text': segment[3]
					}) + '\n')


if __name__ == '__main__':
	root_dir = sys.argv[1]
	output_file = sys.argv[2]
	device_indexes = list(map(int, sys.argv[3:]))
	model = WhisperModel(
		'large-v3',
		device='cuda',
		device_index=device_indexes,
		num_workers=len(device_indexes)
	)
	pipeline = BatchedInferencePipeline(model)
	transcribe_directory(
		pipeline,
		root_dir,
		output_file,
		num_workers=len(device_indexes)
	)

And I saw this (I was using GPU 0 & 1):

2025-07-28.16-09-43.mp4

Is there something I am missing?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions