27
27
from requests import Response
28
28
29
29
30
+ class ModelSelectionStrategy (Enum ):
31
+ ROUND_ROBIN = auto ()
32
+ RANDOM = auto ()
33
+
34
+
30
35
class PromptSource (Enum ):
31
36
SYNTHETIC = auto ()
32
37
DATASET = auto ()
@@ -78,7 +83,8 @@ def create_llm_inputs(
78
83
input_type : PromptSource ,
79
84
output_format : OutputFormat ,
80
85
dataset_name : str = "" ,
81
- model_name : str = "" ,
86
+ model_name : list = [],
87
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
82
88
input_filename : Optional [Path ] = Path ("" ),
83
89
starting_index : int = DEFAULT_STARTING_INDEX ,
84
90
length : int = DEFAULT_LENGTH ,
@@ -194,6 +200,7 @@ def create_llm_inputs(
194
200
output_tokens_stddev ,
195
201
output_tokens_deterministic ,
196
202
model_name ,
203
+ model_selection_strategy ,
197
204
)
198
205
cls ._write_json_to_file (json_in_pa_format , output_dir )
199
206
@@ -354,7 +361,8 @@ def _convert_generic_json_to_output_format(
354
361
output_tokens_mean : int ,
355
362
output_tokens_stddev : int ,
356
363
output_tokens_deterministic : bool ,
357
- model_name : str = "" ,
364
+ model_name : list = [],
365
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
358
366
) -> Dict :
359
367
if output_format == OutputFormat .OPENAI_CHAT_COMPLETIONS :
360
368
output_json = cls ._convert_generic_json_to_openai_chat_completions_format (
@@ -366,6 +374,7 @@ def _convert_generic_json_to_output_format(
366
374
output_tokens_stddev ,
367
375
output_tokens_deterministic ,
368
376
model_name ,
377
+ model_selection_strategy ,
369
378
)
370
379
elif output_format == OutputFormat .OPENAI_COMPLETIONS :
371
380
output_json = cls ._convert_generic_json_to_openai_completions_format (
@@ -377,6 +386,7 @@ def _convert_generic_json_to_output_format(
377
386
output_tokens_stddev ,
378
387
output_tokens_deterministic ,
379
388
model_name ,
389
+ model_selection_strategy ,
380
390
)
381
391
elif output_format == OutputFormat .VLLM :
382
392
output_json = cls ._convert_generic_json_to_vllm_format (
@@ -388,6 +398,7 @@ def _convert_generic_json_to_output_format(
388
398
output_tokens_stddev ,
389
399
output_tokens_deterministic ,
390
400
model_name ,
401
+ model_selection_strategy ,
391
402
)
392
403
elif output_format == OutputFormat .TENSORRTLLM :
393
404
output_json = cls ._convert_generic_json_to_trtllm_format (
@@ -399,6 +410,7 @@ def _convert_generic_json_to_output_format(
399
410
output_tokens_stddev ,
400
411
output_tokens_deterministic ,
401
412
model_name ,
413
+ model_selection_strategy ,
402
414
)
403
415
else :
404
416
raise GenAIPerfException (
@@ -417,7 +429,8 @@ def _convert_generic_json_to_openai_chat_completions_format(
417
429
output_tokens_mean : int ,
418
430
output_tokens_stddev : int ,
419
431
output_tokens_deterministic : bool ,
420
- model_name : str = "" ,
432
+ model_name : list = [],
433
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
421
434
) -> Dict :
422
435
# TODO (TMA-1757): Implement a way to select a role for `text_input`
423
436
(
@@ -436,6 +449,7 @@ def _convert_generic_json_to_openai_chat_completions_format(
436
449
output_tokens_stddev ,
437
450
output_tokens_deterministic ,
438
451
model_name ,
452
+ model_selection_strategy ,
439
453
)
440
454
441
455
return pa_json
@@ -450,7 +464,8 @@ def _convert_generic_json_to_openai_completions_format(
450
464
output_tokens_mean : int ,
451
465
output_tokens_stddev : int ,
452
466
output_tokens_deterministic : bool ,
453
- model_name : str = "" ,
467
+ model_name : list = [],
468
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
454
469
) -> Dict :
455
470
(
456
471
system_role_headers ,
@@ -469,6 +484,7 @@ def _convert_generic_json_to_openai_completions_format(
469
484
output_tokens_stddev ,
470
485
output_tokens_deterministic ,
471
486
model_name ,
487
+ model_selection_strategy ,
472
488
)
473
489
474
490
return pa_json
@@ -483,7 +499,8 @@ def _convert_generic_json_to_vllm_format(
483
499
output_tokens_mean : int ,
484
500
output_tokens_stddev : int ,
485
501
output_tokens_deterministic : bool ,
486
- model_name : str = "" ,
502
+ model_name : list = [],
503
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
487
504
) -> Dict :
488
505
(
489
506
system_role_headers ,
@@ -503,6 +520,7 @@ def _convert_generic_json_to_vllm_format(
503
520
output_tokens_stddev ,
504
521
output_tokens_deterministic ,
505
522
model_name ,
523
+ model_selection_strategy ,
506
524
)
507
525
508
526
return pa_json
@@ -517,7 +535,8 @@ def _convert_generic_json_to_trtllm_format(
517
535
output_tokens_mean : int ,
518
536
output_tokens_stddev : int ,
519
537
output_tokens_deterministic : bool ,
520
- model_name : str = "" ,
538
+ model_name : list = [],
539
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
521
540
) -> Dict :
522
541
(
523
542
system_role_headers ,
@@ -537,6 +556,7 @@ def _convert_generic_json_to_trtllm_format(
537
556
output_tokens_stddev ,
538
557
output_tokens_deterministic ,
539
558
model_name ,
559
+ model_selection_strategy ,
540
560
)
541
561
542
562
return pa_json
@@ -577,6 +597,17 @@ def _determine_json_feature_roles(
577
597
578
598
return system_role_headers , user_role_headers , text_input_headers
579
599
600
+ @classmethod
601
+ def _select_model_name (cls , model_name , index , model_selection_strategy ):
602
+ if model_selection_strategy == ModelSelectionStrategy .ROUND_ROBIN :
603
+ return model_name [index % len (model_name )]
604
+ elif model_selection_strategy == ModelSelectionStrategy .RANDOM :
605
+ return random .choice (model_name )
606
+ else :
607
+ raise GenAIPerfException (
608
+ f"Model selection strategy '{ model_selection_strategy } ' is unsupported"
609
+ )
610
+
580
611
@classmethod
581
612
def _populate_openai_chat_completions_output_json (
582
613
cls ,
@@ -589,11 +620,15 @@ def _populate_openai_chat_completions_output_json(
589
620
output_tokens_mean : int ,
590
621
output_tokens_stddev : int ,
591
622
output_tokens_deterministic : bool ,
592
- model_name : str = "" ,
623
+ model_name : list = [],
624
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
593
625
) -> Dict :
594
626
pa_json = cls ._create_empty_openai_pa_json ()
595
627
596
628
for index , entry in enumerate (dataset_json ["rows" ]):
629
+ iter_model_name = cls ._select_model_name (
630
+ model_name , index , model_selection_strategy
631
+ )
597
632
pa_json ["data" ].append ({"payload" : []})
598
633
pa_json ["data" ][index ]["payload" ].append ({"messages" : []})
599
634
@@ -613,7 +648,7 @@ def _populate_openai_chat_completions_output_json(
613
648
output_tokens_mean ,
614
649
output_tokens_stddev ,
615
650
output_tokens_deterministic ,
616
- model_name ,
651
+ iter_model_name ,
617
652
)
618
653
619
654
return pa_json
@@ -631,11 +666,15 @@ def _populate_openai_completions_output_json(
631
666
output_tokens_mean : int ,
632
667
output_tokens_stddev : int ,
633
668
output_tokens_deterministic : bool ,
634
- model_name : str = "" ,
669
+ model_name : list = [],
670
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
635
671
) -> Dict :
636
672
pa_json = cls ._create_empty_openai_pa_json ()
637
673
638
674
for index , entry in enumerate (dataset_json ["rows" ]):
675
+ iter_model_name = cls ._select_model_name (
676
+ model_name , index , model_selection_strategy
677
+ )
639
678
pa_json ["data" ].append ({"payload" : []})
640
679
pa_json ["data" ][index ]["payload" ].append ({"prompt" : "" })
641
680
@@ -659,7 +698,7 @@ def _populate_openai_completions_output_json(
659
698
output_tokens_mean ,
660
699
output_tokens_stddev ,
661
700
output_tokens_deterministic ,
662
- model_name ,
701
+ iter_model_name ,
663
702
)
664
703
665
704
return pa_json
@@ -677,11 +716,15 @@ def _populate_vllm_output_json(
677
716
output_tokens_mean : int ,
678
717
output_tokens_stddev : int ,
679
718
output_tokens_deterministic : bool ,
680
- model_name : str = "" ,
719
+ model_name : list = [],
720
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
681
721
) -> Dict :
682
722
pa_json = cls ._create_empty_vllm_pa_json ()
683
723
684
724
for index , entry in enumerate (dataset_json ["rows" ]):
725
+ iter_model_name = cls ._select_model_name (
726
+ model_name , index , model_selection_strategy
727
+ )
685
728
pa_json ["data" ].append ({"text_input" : ["" ]})
686
729
687
730
for header , content in entry .items ():
@@ -706,7 +749,7 @@ def _populate_vllm_output_json(
706
749
output_tokens_mean ,
707
750
output_tokens_stddev ,
708
751
output_tokens_deterministic ,
709
- model_name ,
752
+ iter_model_name ,
710
753
)
711
754
712
755
return pa_json
@@ -724,7 +767,8 @@ def _populate_trtllm_output_json(
724
767
output_tokens_mean : int ,
725
768
output_tokens_stddev : int ,
726
769
output_tokens_deterministic : bool ,
727
- model_name : str = "" ,
770
+ model_name : list = [],
771
+ model_selection_strategy : ModelSelectionStrategy = ModelSelectionStrategy .ROUND_ROBIN ,
728
772
) -> Dict :
729
773
pa_json = cls ._create_empty_trtllm_pa_json ()
730
774
default_max_tokens = (
@@ -733,6 +777,9 @@ def _populate_trtllm_output_json(
733
777
)
734
778
735
779
for index , entry in enumerate (dataset_json ["rows" ]):
780
+ iter_model_name = cls ._select_model_name (
781
+ model_name , index , model_selection_strategy
782
+ )
736
783
pa_json ["data" ].append ({"text_input" : ["" ]})
737
784
738
785
for header , content in entry .items ():
@@ -760,7 +807,7 @@ def _populate_trtllm_output_json(
760
807
output_tokens_mean ,
761
808
output_tokens_stddev ,
762
809
output_tokens_deterministic ,
763
- model_name ,
810
+ iter_model_name ,
764
811
)
765
812
766
813
return pa_json
0 commit comments