Skip to content

Commit 148206e

Browse files
authored
Make sharding_first by default (#11059)
* make sharding_first by default * update run_ci.sh * Update V100 baseline * Update A100 baselines * Revert "Update A100 baselines" This reverts commit c682b27. * Revert "Update V100 baseline" This reverts commit 1ca17b7. * fix topo in CI cases * fix JSON format error
1 parent 462597f commit 148206e

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

paddlenlp/trainer/training_args.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ class TrainingArguments:
792792
"Following options are supported:\n"
793793
"- pp_first. the topo order is dp, pp, sharding, mp \n"
794794
"- sharding_first. the topo order is dp, sharding, pp, mp \n"
795-
"Default is None, for pp_first"
795+
"Default is None, for sharding_first"
796796
)
797797
},
798798
)
@@ -2107,7 +2107,7 @@ def _post_init_parallel_degree(self):
21072107
self.expert_tensor_parallel_degree = -1
21082108

21092109
if self.hybrid_parallel_topo_order is None:
2110-
self.hybrid_parallel_topo_order = "pp_first"
2110+
self.hybrid_parallel_topo_order = "sharding_first"
21112111
assert self.hybrid_parallel_topo_order in ["pp_first", "sharding_first"]
21122112

21132113
if self.use_hybrid_parallel and self.enable_auto_parallel:

scripts/distribute/ci_case_auto.sh

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ export llama_data_path=/llama_data
2525
export llm_gpt_case_path=$root_path/llm/auto_parallel/gpt-3
2626
export gpt_data_path=/fleetx_data
2727

28+
DEFAULT_TOPO=pp_first
29+
2830
unset CUDA_VISIBLE_DEVICES
2931

3032
function is_a100() {
@@ -256,6 +258,7 @@ function llama_dygraph_auto_bs4_bf16_SD2() {
256258
./run_pretrain_auto.py \
257259
--model_name_or_path "meta-llama/Llama-2-7b" \
258260
--tokenizer_name_or_path "meta-llama/Llama-2-7b" \
261+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
259262
--input_dir "./data" \
260263
--output_dir "./output" \
261264
--weight_decay 0.01 \
@@ -358,6 +361,7 @@ function llama_dygraph_auto_bs8_fp32_DP2() {
358361
--model_type "llama" \
359362
--model_name_or_path "facebook/llama-7b" \
360363
--tokenizer_name_or_path "facebook/llama-7b" \
364+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
361365
--input_dir "./data" \
362366
--output_dir $case_out_dir \
363367
--split 949,50,1 \
@@ -429,6 +433,7 @@ function llama_dygraph_auto_bs8_fp32_DP2-MP2() {
429433
--model_type "llama" \
430434
--model_name_or_path "facebook/llama-7b" \
431435
--tokenizer_name_or_path "facebook/llama-7b" \
436+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
432437
--input_dir "./data" \
433438
--output_dir $case_out_dir \
434439
--split 949,50,1 \
@@ -511,6 +516,7 @@ function llama_dygraph_auto_bs8_fp32_DP2-MP2-PP2() {
511516
--model_type "llama" \
512517
--model_name_or_path "facebook/llama-7b" \
513518
--tokenizer_name_or_path "facebook/llama-7b" \
519+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
514520
--input_dir "./data" \
515521
--output_dir $case_out_dir \
516522
--split 949,50,1 \
@@ -584,6 +590,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
584590
--model_type "llama" \
585591
--model_name_or_path "facebook/llama-7b" \
586592
--tokenizer_name_or_path "facebook/llama-7b" \
593+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
587594
--input_dir "./data" \
588595
--output_dir $case_out_dir \
589596
--split 949,50,1 \
@@ -659,6 +666,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2_intermediate() {
659666
--use_intermediate_api 1\
660667
--model_name_or_path "facebook/llama-7b" \
661668
--tokenizer_name_or_path "facebook/llama-7b" \
669+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
662670
--input_dir "./data" \
663671
--output_dir $case_out_dir \
664672
--split 949,50,1 \
@@ -732,6 +740,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-CP2() {
732740
--model_type "llama" \
733741
--model_name_or_path "facebook/llama-7b" \
734742
--tokenizer_name_or_path "facebook/llama-7b" \
743+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
735744
--input_dir "./data" \
736745
--output_dir $case_out_dir \
737746
--split 949,50,1 \
@@ -806,6 +815,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-CP2_intermediate() {
806815
python -u -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" --log_dir $case_log_dir run_pretrain_auto.py \
807816
--model_name_or_path "facebook/llama-7b" \
808817
--tokenizer_name_or_path "facebook/llama-7b" \
818+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
809819
--input_dir "./data" \
810820
--output_dir $case_out_dir \
811821
--split 949,50,1 \
@@ -883,6 +893,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2_hybrid_pp() {
883893
--model_type "llama_pp" \
884894
--model_name_or_path "facebook/llama-7b" \
885895
--tokenizer_name_or_path "facebook/llama-7b" \
896+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
886897
--input_dir "./data" \
887898
--output_dir $case_out_dir \
888899
--split 949,50,1 \
@@ -950,6 +961,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2_hybrid_pp() {
950961
--model_type "llama_pp" \
951962
--model_name_or_path "facebook/llama-7b" \
952963
--tokenizer_name_or_path "facebook/llama-7b" \
964+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
953965
--input_dir "./data" \
954966
--output_dir $auto_case_out_dir \
955967
--split 949,50,1 \
@@ -1031,6 +1043,7 @@ function llama_dy2st_auto_bs4_bf16_DP1-MP1-PP4-SD2() {
10311043
./run_pretrain_auto.py \
10321044
--model_name_or_path "meta-llama/Llama-2-13b" \
10331045
--tokenizer_name_or_path "meta-llama/Llama-2-13b" \
1046+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
10341047
--input_dir "./data" \
10351048
--output_dir "./output" \
10361049
--split 949,50,1 \
@@ -1131,6 +1144,7 @@ function llama_dy2st_auto_bs4_bf16_DP1-MP1-PP4-SD2-VPP3_split_bw() {
11311144
./run_pretrain_auto.py \
11321145
--model_name_or_path "meta-llama/Llama-2-13b" \
11331146
--tokenizer_name_or_path "meta-llama/Llama-2-13b" \
1147+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
11341148
--input_dir "./data" \
11351149
--output_dir "./output" \
11361150
--split 949,50,1 \
@@ -1245,6 +1259,7 @@ function llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP1-SP() {
12451259
--model_type "llama" \
12461260
--model_name_or_path "facebook/llama-7b" \
12471261
--tokenizer_name_or_path "facebook/llama-7b" \
1262+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
12481263
--input_dir "./data" \
12491264
--output_dir $case_out_dir \
12501265
--split 949,50,1 \
@@ -1361,6 +1376,7 @@ function llama_pir_auto_fuse_ffn_attention_qkv_MP2() {
13611376
run_pretrain_auto.py \
13621377
--model_name_or_path "facebook/llama-7b" \
13631378
--tokenizer_name_or_path "facebook/llama-7b" \
1379+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
13641380
--input_dir "./data" \
13651381
--output_dir $auto_case_out_dir \
13661382
--split 949,50,1 \
@@ -1523,6 +1539,7 @@ function llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP2-SP() {
15231539
--model_type "llama" \
15241540
--model_name_or_path "facebook/llama-7b" \
15251541
--tokenizer_name_or_path "facebook/llama-7b" \
1542+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
15261543
--input_dir "./data" \
15271544
--output_dir $case_out_dir \
15281545
--split 949,50,1 \
@@ -1623,6 +1640,7 @@ function llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1() {
16231640
--model_type "llama" \
16241641
--model_name_or_path "facebook/llama-7b" \
16251642
--tokenizer_name_or_path "facebook/llama-7b" \
1643+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
16261644
--input_dir "./data" \
16271645
--output_dir $case_out_dir \
16281646
--split 949,50,1 \
@@ -1737,6 +1755,7 @@ function llama_dy2st_auto_bs2_bf16_DP2-MP1-PP1-CINN() {
17371755
--model_type "llama" \
17381756
--model_name_or_path "facebook/llama-7b" \
17391757
--tokenizer_name_or_path "facebook/llama-7b" \
1758+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
17401759
--input_dir "./data" \
17411760
--output_dir $case_out_dir \
17421761
--split 949,50,1 \
@@ -1836,6 +1855,7 @@ function llama_dpo_dy2st_auto_bs2_bf16_MP8_intermediate() {
18361855
--log_dir $case_log_dir \
18371856
../run_dpo_auto.py\
18381857
--model_name_or_path "meta-llama/Meta-Llama-3.1-8B-Instruct" \
1858+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
18391859
--train_dataset_path ${llama_data_path}/data_dpo/data/train.jsonl \
18401860
--dev_dataset_path ${llama_data_path}/data_dpo/data/dev.jsonl \
18411861
--output_dir ./checkpoints/dpo_ckpts \
@@ -1926,6 +1946,7 @@ function llama_align_dygraph_dy2st_pir_auto_grad_merge_bs2_fp32_DP1-MP1-PP1() {
19261946
--model_type "llama" \
19271947
--model_name_or_path "facebook/llama-7b" \
19281948
--tokenizer_name_or_path "facebook/llama-7b" \
1949+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
19291950
--input_dir "./data" \
19301951
--output_dir $case_out_dir \
19311952
--split 949,50,1 \
@@ -2033,6 +2054,7 @@ function llama_align_dy2st_fthenb_and_vpp_auto_bs2_fp32_DP1-MP1-PP4() {
20332054
--model_type "llama" \
20342055
--model_name_or_path "facebook/llama-7b" \
20352056
--tokenizer_name_or_path "facebook/llama-7b" \
2057+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
20362058
--input_dir "./data" \
20372059
--output_dir $case_out_dir \
20382060
--split 949,50,1 \
@@ -2156,6 +2178,7 @@ function llama_align_dygraph_dy2st_pir_auto_pp_bs2_bf16_DP1-MP1-PP4() {
21562178
--model_type "llama" \
21572179
--model_name_or_path "facebook/llama-7b" \
21582180
--tokenizer_name_or_path "facebook/llama-7b" \
2181+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
21592182
--input_dir "./data" \
21602183
--output_dir $case_out_dir \
21612184
--split 949,50,1 \
@@ -2248,6 +2271,7 @@ function llama_convert_hybrid_ckpt_to_auto_parallel_bs2_fp32_DP2-MP1-PP1() {
22482271
../../run_pretrain.py \
22492272
--model_name_or_path "facebook/llama-7b" \
22502273
--tokenizer_name_or_path "facebook/llama-7b" \
2274+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
22512275
--input_dir "./data" \
22522276
--output_dir $dy_case_out_dir \
22532277
--split 949,50,1 \
@@ -2321,6 +2345,7 @@ function llama_convert_hybrid_ckpt_to_auto_parallel_bs2_fp32_DP2-MP1-PP1() {
23212345
run_pretrain_auto.py \
23222346
--model_name_or_path "facebook/llama-7b" \
23232347
--tokenizer_name_or_path "facebook/llama-7b" \
2348+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
23242349
--input_dir "./data" \
23252350
--output_dir $auto_case_out_dir \
23262351
--split 949,50,1 \
@@ -2403,6 +2428,7 @@ function llama_baichuan_pir_auto_fuse_ffn_attention_qkv_DP2_MP2_PP2(){
24032428
--model_type "llama" \
24042429
--model_name_or_path "baichuan-inc/Baichuan2-13B-Base" \
24052430
--tokenizer_name_or_path "baichuan-inc/Baichuan2-13B-Base" \
2431+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
24062432
--input_dir "./data" \
24072433
--output_dir $case_out_dir \
24082434
--split 949,50,1 \
@@ -2475,6 +2501,7 @@ function llama_baichuan_pir_auto_fuse_ffn_attention_qkv_DP2_MP2_PP2_intermediate
24752501
--use_intermediate_api true \
24762502
--model_name_or_path "baichuan-inc/Baichuan2-13B-Base" \
24772503
--tokenizer_name_or_path "baichuan-inc/Baichuan2-13B-Base" \
2504+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
24782505
--input_dir "./data" \
24792506
--output_dir $case_out_dir \
24802507
--split 949,50,1 \
@@ -2548,6 +2575,7 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2() {
25482575
run_pretrain_auto.py \
25492576
--model_name_or_path gpt2-medium-en \
25502577
--tokenizer_name_or_path gpt2-medium-en \
2578+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
25512579
--input_dir "$gpt_data_path/data" \
25522580
--output_dir "output/$task_name" \
25532581
--split 949,50,1 \
@@ -2620,6 +2648,7 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2() {
26202648
run_pretrain_auto.py \
26212649
--model_name_or_path gpt2-medium-en \
26222650
--tokenizer_name_or_path gpt2-medium-en \
2651+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
26232652
--input_dir "$gpt_data_path/data" \
26242653
--output_dir $case_out_dir \
26252654
--split 949,50,1 \
@@ -2692,6 +2721,7 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2() {
26922721
run_pretrain_auto.py \
26932722
--model_name_or_path gpt2-medium-en \
26942723
--tokenizer_name_or_path gpt2-medium-en \
2724+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
26952725
--input_dir "$gpt_data_path/data" \
26962726
--output_dir $case_out_dir \
26972727
--split 949,50,1 \
@@ -2765,6 +2795,7 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
27652795
run_pretrain_auto.py \
27662796
--model_name_or_path gpt2-medium-en \
27672797
--tokenizer_name_or_path gpt2-medium-en \
2798+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
27682799
--input_dir "$gpt_data_path/data" \
27692800
--output_dir $case_out_dir \
27702801
--split 949,50,1 \
@@ -2838,6 +2869,7 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2_intermediate() {
28382869
run_pretrain_auto.py \
28392870
--model_name_or_path gpt2-medium-en \
28402871
--tokenizer_name_or_path gpt2-medium-en \
2872+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
28412873
--input_dir "$gpt_data_path/data" \
28422874
--output_dir $case_out_dir \
28432875
--split 949,50,1 \
@@ -2911,6 +2943,7 @@ function llm_gpt_pir_auto_bs4_TP2(){
29112943
run_pretrain_auto.py \
29122944
--model_name_or_path gpt3-13B-en \
29132945
--tokenizer_name_or_path gpt3-13B-en \
2946+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
29142947
--input_dir "$gpt_data_path/data" \
29152948
--output_dir "output/$task_name" \
29162949
--split 949,50,1 \
@@ -2978,6 +3011,7 @@ function llm_gpt_pir_auto_bs4_TP2_PP2(){
29783011
run_pretrain_auto.py \
29793012
--model_name_or_path gpt3-13B-en \
29803013
--tokenizer_name_or_path gpt3-13B-en \
3014+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
29813015
--input_dir "$gpt_data_path/data" \
29823016
--output_dir "output/$task_name" \
29833017
--split 949,50,1 \
@@ -3041,6 +3075,7 @@ function llm_gpt_pir_auto_bs8_DP2_TP2_PP2(){
30413075
run_pretrain_auto.py \
30423076
--model_name_or_path gpt3-13B-en \
30433077
--tokenizer_name_or_path gpt3-13B-en \
3078+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
30443079
--input_dir "$gpt_data_path/data" \
30453080
--output_dir "output/$task_name" \
30463081
--split 949,50,1 \
@@ -3107,6 +3142,7 @@ function llm_gpt_pir_auto_bs8_DP2_TP2_PP2_intermediate(){
31073142
run_pretrain_auto.py \
31083143
--model_name_or_path gpt3-13B-en \
31093144
--tokenizer_name_or_path gpt3-13B-en \
3145+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
31103146
--input_dir "$gpt_data_path/data" \
31113147
--output_dir "output/$task_name" \
31123148
--split 949,50,1 \
@@ -3163,6 +3199,7 @@ function llm_qwen_dygraph_auto_bs1_fp32_DP2() {
31633199
{
31643200
"model_name_or_path": "qwen/qwen-7b",
31653201
"tokenizer_name_or_path": "qwen/qwen-7b",
3202+
"hybrid_parallel_topo_order": "$DEFAULT_TOPO",
31663203
"input_dir": "./data",
31673204
"output_dir": "./checkpoints/qwen_pretrain_ckpts",
31683205
"per_device_train_batch_size": 1,
@@ -3254,6 +3291,7 @@ function llm_qwen_dygraph_auto_bs1_fp32_DP2-MP2() {
32543291
{
32553292
"model_name_or_path": "qwen/qwen-7b",
32563293
"tokenizer_name_or_path": "qwen/qwen-7b",
3294+
"hybrid_parallel_topo_order": "$DEFAULT_TOPO",
32573295
"input_dir": "./data",
32583296
"output_dir": "./checkpoints/qwen_pretrain_ckpts",
32593297
"per_device_train_batch_size": 1,
@@ -3343,6 +3381,7 @@ function llm_qwen_dygraph_auto_bs1_fp32_DP2-MP2-PP2() {
33433381
{
33443382
"model_name_or_path": "qwen/qwen-7b",
33453383
"tokenizer_name_or_path": "qwen/qwen-7b",
3384+
"hybrid_parallel_topo_order": "$DEFAULT_TOPO",
33463385
"input_dir": "./data",
33473386
"output_dir": "./checkpoints/qwen_pretrain_ckpts",
33483387
"per_device_train_batch_size": 1,
@@ -3434,6 +3473,7 @@ function llm_qwen_dygraph_auto_bs1_bf16_DP2-MP2-PP2() {
34343473
{
34353474
"model_name_or_path": "qwen/qwen-7b",
34363475
"tokenizer_name_or_path": "qwen/qwen-7b",
3476+
"hybrid_parallel_topo_order": "$DEFAULT_TOPO",
34373477
"input_dir": "./data",
34383478
"output_dir": "./checkpoints/qwen_pretrain_ckpts",
34393479
"per_device_train_batch_size": 1,
@@ -3547,6 +3587,7 @@ function llm_qwen_pir_auto_bs1_bf16_TP2(){
35473587
run_pretrain_auto.py \
35483588
--model_name_or_path "qwen/qwen-14b" \
35493589
--tokenizer_name_or_path "qwen/qwen-14b" \
3590+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
35503591
--input_dir "./data" \
35513592
--output_dir "output/$task_name/" \
35523593
--per_device_train_batch_size 1\
@@ -3624,6 +3665,7 @@ function llm_qwen_pir_auto_bs1_bf16_TP2_PP2(){
36243665
run_pretrain_auto.py \
36253666
--model_name_or_path "qwen/qwen-14b" \
36263667
--tokenizer_name_or_path "qwen/qwen-14b" \
3668+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
36273669
--input_dir "./data" \
36283670
--output_dir "output/$task_name/" \
36293671
--per_device_train_batch_size 1\
@@ -3694,6 +3736,7 @@ function llama_lora_static_graph_auto_bs_2_bf16_DP2-TP2-PP1() {
36943736
--log_dir "$case_log_dir" \
36953737
../run_finetune_auto.py \
36963738
--model_name_or_path "meta-llama/Meta-Llama-3.1-8B-Instruct" \
3739+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
36973740
--dataset_name_or_path "./data" \
36983741
--output_dir "$case_out_dir" \
36993742
--enable_auto_parallel true \
@@ -3853,6 +3896,7 @@ if [ $IS_A100 -eq 1 ]; then
38533896
--model_type "deepseekv3_auto" \
38543897
--model_name_or_path $model_config_json \
38553898
--tokenizer_name_or_path "deepseek-ai/DeepSeek-V3" \
3899+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
38563900
--input_dir "./data" \
38573901
--output_dir "output/$task_name" \
38583902
--split 949,50,1 \
@@ -3999,6 +4043,7 @@ if [ $IS_A100 -eq 1 ]; then
39994043
--model_type "deepseekv3_auto" \
40004044
--model_name_or_path $model_config_json \
40014045
--tokenizer_name_or_path "deepseek-ai/DeepSeek-V3" \
4046+
--hybrid_parallel_topo_order $DEFAULT_TOPO \
40024047
--input_dir "./data" \
40034048
--output_dir "output/$task_name" \
40044049
--split 949,50,1 \
@@ -4075,6 +4120,7 @@ function llama_baichuan_dygraph_auto_sp_async_reduce_scatter_bs8_bf16_DP4-MP2-SP
40754120
{
40764121
"model_name_or_path": "baichuan-inc/Baichuan2-13B-Base",
40774122
"tokenizer_name_or_path": "baichuan-inc/Baichuan2-13B-Base",
4123+
"hybrid_parallel_topo_order": "$DEFAULT_TOPO",
40784124
"input_dir": "./data",
40794125
"output_dir": "./checkpoints/baichuan2_13b_ckpts",
40804126
"split": "949,50,1",

0 commit comments

Comments
 (0)