Skip to content

Commit 1c55334

Browse files
authored
Merge branch 'main' into tgerdes-faster-teardown
2 parents 005afd1 + 9612fbe commit 1c55334

File tree

9 files changed

+784
-497
lines changed

9 files changed

+784
-497
lines changed

src/c++/perf_analyzer/genai-perf/README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,11 +342,18 @@ Show the help message and exit.
342342

343343
## Endpoint Options:
344344

345-
##### `-m <str>`
346-
##### `--model <str>`
345+
##### `-m <list>`
346+
##### `--model <list>`
347347

348348
The name of the model to benchmark. (default: `None`)
349349

350+
##### `--model-selection-strategy {round_robin, random}`
351+
352+
When multiple model are specified, this is how a specific model
353+
should be assigned to a prompt. round_robin means that ith prompt in the
354+
list gets assigned to i mod len(models). random means that assignment is
355+
uniformly random (default: `round_robin`)
356+
350357
##### `--backend {tensorrtllm,vllm}`
351358

352359
When using the "triton" service-kind, this is the backend of the model. For the

src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@
2727
from requests import Response
2828

2929

30+
class ModelSelectionStrategy(Enum):
31+
ROUND_ROBIN = auto()
32+
RANDOM = auto()
33+
34+
3035
class PromptSource(Enum):
3136
SYNTHETIC = auto()
3237
DATASET = auto()
@@ -78,7 +83,8 @@ def create_llm_inputs(
7883
input_type: PromptSource,
7984
output_format: OutputFormat,
8085
dataset_name: str = "",
81-
model_name: str = "",
86+
model_name: list = [],
87+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
8288
input_filename: Optional[Path] = Path(""),
8389
starting_index: int = DEFAULT_STARTING_INDEX,
8490
length: int = DEFAULT_LENGTH,
@@ -194,6 +200,7 @@ def create_llm_inputs(
194200
output_tokens_stddev,
195201
output_tokens_deterministic,
196202
model_name,
203+
model_selection_strategy,
197204
)
198205
cls._write_json_to_file(json_in_pa_format, output_dir)
199206

@@ -354,7 +361,8 @@ def _convert_generic_json_to_output_format(
354361
output_tokens_mean: int,
355362
output_tokens_stddev: int,
356363
output_tokens_deterministic: bool,
357-
model_name: str = "",
364+
model_name: list = [],
365+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
358366
) -> Dict:
359367
if output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
360368
output_json = cls._convert_generic_json_to_openai_chat_completions_format(
@@ -366,6 +374,7 @@ def _convert_generic_json_to_output_format(
366374
output_tokens_stddev,
367375
output_tokens_deterministic,
368376
model_name,
377+
model_selection_strategy,
369378
)
370379
elif output_format == OutputFormat.OPENAI_COMPLETIONS:
371380
output_json = cls._convert_generic_json_to_openai_completions_format(
@@ -377,6 +386,7 @@ def _convert_generic_json_to_output_format(
377386
output_tokens_stddev,
378387
output_tokens_deterministic,
379388
model_name,
389+
model_selection_strategy,
380390
)
381391
elif output_format == OutputFormat.VLLM:
382392
output_json = cls._convert_generic_json_to_vllm_format(
@@ -388,6 +398,7 @@ def _convert_generic_json_to_output_format(
388398
output_tokens_stddev,
389399
output_tokens_deterministic,
390400
model_name,
401+
model_selection_strategy,
391402
)
392403
elif output_format == OutputFormat.TENSORRTLLM:
393404
output_json = cls._convert_generic_json_to_trtllm_format(
@@ -399,6 +410,7 @@ def _convert_generic_json_to_output_format(
399410
output_tokens_stddev,
400411
output_tokens_deterministic,
401412
model_name,
413+
model_selection_strategy,
402414
)
403415
else:
404416
raise GenAIPerfException(
@@ -417,7 +429,8 @@ def _convert_generic_json_to_openai_chat_completions_format(
417429
output_tokens_mean: int,
418430
output_tokens_stddev: int,
419431
output_tokens_deterministic: bool,
420-
model_name: str = "",
432+
model_name: list = [],
433+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
421434
) -> Dict:
422435
# TODO (TMA-1757): Implement a way to select a role for `text_input`
423436
(
@@ -436,6 +449,7 @@ def _convert_generic_json_to_openai_chat_completions_format(
436449
output_tokens_stddev,
437450
output_tokens_deterministic,
438451
model_name,
452+
model_selection_strategy,
439453
)
440454

441455
return pa_json
@@ -450,7 +464,8 @@ def _convert_generic_json_to_openai_completions_format(
450464
output_tokens_mean: int,
451465
output_tokens_stddev: int,
452466
output_tokens_deterministic: bool,
453-
model_name: str = "",
467+
model_name: list = [],
468+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
454469
) -> Dict:
455470
(
456471
system_role_headers,
@@ -469,6 +484,7 @@ def _convert_generic_json_to_openai_completions_format(
469484
output_tokens_stddev,
470485
output_tokens_deterministic,
471486
model_name,
487+
model_selection_strategy,
472488
)
473489

474490
return pa_json
@@ -483,7 +499,8 @@ def _convert_generic_json_to_vllm_format(
483499
output_tokens_mean: int,
484500
output_tokens_stddev: int,
485501
output_tokens_deterministic: bool,
486-
model_name: str = "",
502+
model_name: list = [],
503+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
487504
) -> Dict:
488505
(
489506
system_role_headers,
@@ -503,6 +520,7 @@ def _convert_generic_json_to_vllm_format(
503520
output_tokens_stddev,
504521
output_tokens_deterministic,
505522
model_name,
523+
model_selection_strategy,
506524
)
507525

508526
return pa_json
@@ -517,7 +535,8 @@ def _convert_generic_json_to_trtllm_format(
517535
output_tokens_mean: int,
518536
output_tokens_stddev: int,
519537
output_tokens_deterministic: bool,
520-
model_name: str = "",
538+
model_name: list = [],
539+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
521540
) -> Dict:
522541
(
523542
system_role_headers,
@@ -537,6 +556,7 @@ def _convert_generic_json_to_trtllm_format(
537556
output_tokens_stddev,
538557
output_tokens_deterministic,
539558
model_name,
559+
model_selection_strategy,
540560
)
541561

542562
return pa_json
@@ -577,6 +597,17 @@ def _determine_json_feature_roles(
577597

578598
return system_role_headers, user_role_headers, text_input_headers
579599

600+
@classmethod
601+
def _select_model_name(cls, model_name, index, model_selection_strategy):
602+
if model_selection_strategy == ModelSelectionStrategy.ROUND_ROBIN:
603+
return model_name[index % len(model_name)]
604+
elif model_selection_strategy == ModelSelectionStrategy.RANDOM:
605+
return random.choice(model_name)
606+
else:
607+
raise GenAIPerfException(
608+
f"Model selection strategy '{model_selection_strategy}' is unsupported"
609+
)
610+
580611
@classmethod
581612
def _populate_openai_chat_completions_output_json(
582613
cls,
@@ -589,11 +620,15 @@ def _populate_openai_chat_completions_output_json(
589620
output_tokens_mean: int,
590621
output_tokens_stddev: int,
591622
output_tokens_deterministic: bool,
592-
model_name: str = "",
623+
model_name: list = [],
624+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
593625
) -> Dict:
594626
pa_json = cls._create_empty_openai_pa_json()
595627

596628
for index, entry in enumerate(dataset_json["rows"]):
629+
iter_model_name = cls._select_model_name(
630+
model_name, index, model_selection_strategy
631+
)
597632
pa_json["data"].append({"payload": []})
598633
pa_json["data"][index]["payload"].append({"messages": []})
599634

@@ -613,7 +648,7 @@ def _populate_openai_chat_completions_output_json(
613648
output_tokens_mean,
614649
output_tokens_stddev,
615650
output_tokens_deterministic,
616-
model_name,
651+
iter_model_name,
617652
)
618653

619654
return pa_json
@@ -631,11 +666,15 @@ def _populate_openai_completions_output_json(
631666
output_tokens_mean: int,
632667
output_tokens_stddev: int,
633668
output_tokens_deterministic: bool,
634-
model_name: str = "",
669+
model_name: list = [],
670+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
635671
) -> Dict:
636672
pa_json = cls._create_empty_openai_pa_json()
637673

638674
for index, entry in enumerate(dataset_json["rows"]):
675+
iter_model_name = cls._select_model_name(
676+
model_name, index, model_selection_strategy
677+
)
639678
pa_json["data"].append({"payload": []})
640679
pa_json["data"][index]["payload"].append({"prompt": ""})
641680

@@ -659,7 +698,7 @@ def _populate_openai_completions_output_json(
659698
output_tokens_mean,
660699
output_tokens_stddev,
661700
output_tokens_deterministic,
662-
model_name,
701+
iter_model_name,
663702
)
664703

665704
return pa_json
@@ -677,11 +716,15 @@ def _populate_vllm_output_json(
677716
output_tokens_mean: int,
678717
output_tokens_stddev: int,
679718
output_tokens_deterministic: bool,
680-
model_name: str = "",
719+
model_name: list = [],
720+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
681721
) -> Dict:
682722
pa_json = cls._create_empty_vllm_pa_json()
683723

684724
for index, entry in enumerate(dataset_json["rows"]):
725+
iter_model_name = cls._select_model_name(
726+
model_name, index, model_selection_strategy
727+
)
685728
pa_json["data"].append({"text_input": [""]})
686729

687730
for header, content in entry.items():
@@ -706,7 +749,7 @@ def _populate_vllm_output_json(
706749
output_tokens_mean,
707750
output_tokens_stddev,
708751
output_tokens_deterministic,
709-
model_name,
752+
iter_model_name,
710753
)
711754

712755
return pa_json
@@ -724,7 +767,8 @@ def _populate_trtllm_output_json(
724767
output_tokens_mean: int,
725768
output_tokens_stddev: int,
726769
output_tokens_deterministic: bool,
727-
model_name: str = "",
770+
model_name: list = [],
771+
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
728772
) -> Dict:
729773
pa_json = cls._create_empty_trtllm_pa_json()
730774
default_max_tokens = (
@@ -733,6 +777,9 @@ def _populate_trtllm_output_json(
733777
)
734778

735779
for index, entry in enumerate(dataset_json["rows"]):
780+
iter_model_name = cls._select_model_name(
781+
model_name, index, model_selection_strategy
782+
)
736783
pa_json["data"].append({"text_input": [""]})
737784

738785
for header, content in entry.items():
@@ -760,7 +807,7 @@ def _populate_trtllm_output_json(
760807
output_tokens_mean,
761808
output_tokens_stddev,
762809
output_tokens_deterministic,
763-
model_name,
810+
iter_model_name,
764811
)
765812

766813
return pa_json

src/c++/perf_analyzer/genai-perf/genai_perf/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
6464
output_format=args.output_format,
6565
dataset_name=args.input_dataset,
6666
model_name=args.model,
67+
model_selection_strategy=args.model_selection_strategy,
6768
input_filename=input_filename,
6869
starting_index=LlmInputs.DEFAULT_STARTING_INDEX,
6970
length=args.num_prompts,

src/c++/perf_analyzer/genai-perf/genai_perf/parser.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@
3737
DEFAULT_COMPARE_DIR,
3838
OPEN_ORCA,
3939
)
40-
from genai_perf.llm_inputs.llm_inputs import LlmInputs, OutputFormat, PromptSource
40+
from genai_perf.llm_inputs.llm_inputs import (
41+
LlmInputs,
42+
ModelSelectionStrategy,
43+
OutputFormat,
44+
PromptSource,
45+
)
4146
from genai_perf.plots.plot_config_parser import PlotConfigParser
4247
from genai_perf.plots.plot_manager import PlotManager
4348
from genai_perf.tokenizer import DEFAULT_TOKENIZER
@@ -57,9 +62,23 @@ def _check_model_args(
5762
"""
5863
if not args.subcommand and not args.model:
5964
parser.error("The -m/--model option is required and cannot be empty.")
65+
args = _convert_str_to_enum_entry(
66+
args, "model_selection_strategy", ModelSelectionStrategy
67+
)
68+
_generate_formatted_model_name(args)
6069
return args
6170

6271

72+
def _generate_formatted_model_name(args: argparse.Namespace) -> None:
73+
if len(args.model) == 1:
74+
args.formatted_model_name = args.model[0]
75+
elif len(args.model) == 0:
76+
args.model = None
77+
args.formatted_model_name = None
78+
else:
79+
args.formatted_model_name = args.model[0] + "_multi"
80+
81+
6382
def _check_compare_args(
6483
parser: argparse.ArgumentParser, args: argparse.Namespace
6584
) -> argparse.Namespace:
@@ -140,15 +159,17 @@ def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace:
140159
"""
141160
if args.artifact_dir == Path(DEFAULT_ARTIFACT_DIR):
142161
# Preprocess Huggingface model names that include '/' in their model name.
143-
if (args.model is not None) and ("/" in args.model):
144-
filtered_name = "_".join(args.model.split("/"))
162+
if (args.formatted_model_name is not None) and (
163+
"/" in args.formatted_model_name
164+
):
165+
filtered_name = "_".join(args.formatted_model_name.split("/"))
145166
logger.info(
146-
f"Model name '{args.model}' cannot be used to create artifact "
167+
f"Model name '{args.formatted_model_name}' cannot be used to create artifact "
147168
f"directory. Instead, '{filtered_name}' will be used."
148169
)
149170
name = [f"{filtered_name}"]
150171
else:
151-
name = [f"{args.model}"]
172+
name = [f"{args.formatted_model_name}"]
152173

153174
if args.service_kind == "openai":
154175
name += [f"{args.service_kind}-{args.endpoint_type}"]
@@ -340,9 +361,20 @@ def _add_endpoint_args(parser):
340361
endpoint_group.add_argument(
341362
"-m",
342363
"--model",
364+
nargs="+",
365+
default=[],
366+
help=f"The name of the model(s) to benchmark.",
367+
)
368+
endpoint_group.add_argument(
369+
"--model-selection-strategy",
343370
type=str,
344-
default=None,
345-
help=f"The name of the model to benchmark.",
371+
choices=utils.get_enum_names(ModelSelectionStrategy),
372+
default="round_robin",
373+
required=False,
374+
help=f"When multiple model are specified, this is how a specific model "
375+
"should be assigned to a prompt. round_robin means that ith prompt in the "
376+
"list gets assigned to i mod len(models). random means that assignment is "
377+
"uniformly random",
346378
)
347379

348380
endpoint_group.add_argument(

0 commit comments

Comments
 (0)