You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 4 Next »

2 node * 2 gpus per node, 4 gpus total, gpuA40x4 partition, benchmarks flags: "-d cuda"

( perf. numbers are similar for s11 and openmpi/4.1.5+cuda under nvhpc )

slingshot10 , openmpi/4.1.2, gcc/11.2.0

slingshot11, openmpi/4.1.5+cuda, gcc/11.4.0slingshot11, openmpi/5.0.1+cuda, gcc/11.4.0 (use mpirun, srun not supported )
osu_reduce
# OSU MPI-CUDA Reduce Latency Test v5.9
# Size       Avg Latency(us)
4                     141.83
8                     135.70
16                    133.55
32                    134.55
64                    136.96
128                   142.41
256                   149.63
512                   147.23
1024                  144.80
2048                  153.08
4096                  159.19
8192                  159.99
16384                 166.59
32768                 179.74
65536                 188.19
131072                105.19
262144                218.92
524288                340.70
1048576               726.87
osu_reduce
# OSU MPI-CUDA Reduce Latency Test v5.9
# Size       Avg Latency(us)
4                      46.61
8                      48.19
16                     47.40
32                     48.73
64                     48.64
128                    50.86
256                    51.29
512                    57.45
1024                   76.34
2048                  116.33
4096                   94.03
8192                   94.33
16384                 185.85
32768                 237.63
65536                  71.93
131072                155.68
262144                489.54
524288                291.04
1048576               923.25
osu_reduce
# OSU MPI-CUDA Reduce Latency Test v5.9
# Size       Avg Latency(us)
4                      23.22
8                      23.12
16                     23.46
32                     23.33
64                     24.01
128                    26.91
256                    32.98
512                    33.58
1024                   29.93
2048                   86.17
4096                   91.42
8192                   96.20
16384                 104.96
32768                 138.09
65536                 217.69
131072                387.73
262144               1007.76
524288               2227.94
1048576              4584.68
osu_bcast
# OSU MPI-CUDA Broadcast Latency Test v5.9
# Size       Avg Latency(us)
1                      85.52
2                      86.07
4                      86.32
8                      86.11
16                     86.22
32                     86.72
64                     87.37
128                    87.10
256                    87.52
512                    87.79
1024                   87.73
2048                   87.87
4096                   89.30
8192                   89.80
16384                 171.33
32768                 351.89
65536                 705.63
131072                904.49
262144               1117.40
524288               1320.32
1048576               133.31
osu_bcast
# OSU MPI-CUDA Broadcast Latency Test v5.9
# Size       Avg Latency(us)
1                      89.36
2                      89.29
4                      89.56
8                      89.69
16                     90.72
32                     91.85
64                     91.53
128                   101.95
256                    93.39
512                    95.99
1024                  101.72
2048                  113.58
4096                  143.00
8192                  185.05
16384                 259.32
32768                 391.85
65536                 168.12
131072                233.04
262144                326.11
524288                452.23
1048576               534.99
osu_bcast
# OSU MPI-CUDA Broadcast Latency Test v5.9
# Size       Avg Latency(us)
1                      46.61
2                      46.58
4                      46.62
8                      46.53
16                     46.65
32                     46.71
64                     46.82
128                    47.03
256                    46.98
512                    48.28
1024                   48.68
2048                  186.13
4096                  233.85
8192                  235.12
16384                 238.00
32768                 285.07
65536                 131.28
131072                220.26
262144                413.38
524288                809.30
1048576              1593.66
osu_alltoallv
# OSU MPI-CUDA All-to-Allv Personalized Exchange Latency Test v5.9
# Size       Avg Latency(us)
1                     637.54
2                     638.52
4                     639.18
8                     637.57
16                    635.48
32                    635.14
64                    639.59
128                   643.95
256                   643.28
512                   637.44
1024                  638.55
2048                  638.64
4096                  642.52
8192                  640.43
16384                 805.22
32768                1494.24
65536                2943.63
131072               5846.05
262144              11811.71
524288              23857.81
1048576              1739.03
osu_alltoallv
# OSU MPI-CUDA All-to-Allv Personalized Exchange Latency Test v5.9
# Size       Avg Latency(us)
1                     522.28
2                     521.31
4                     523.75
8                     522.82
16                    522.87
32                    524.22
64                    522.77
128                   526.60
256                   523.93
512                   534.46
1024                  534.16
2048                  476.68
4096                  493.51
8192                  529.08
16384                 720.68
32768                 975.54
65536                 541.50
131072                625.32
262144                856.80
524288               1256.33
1048576              2020.87
osu_alltoallv
# OSU MPI-CUDA All-to-Allv Personalized Exchange Latency Test v5.9
# Size       Avg Latency(us)
1                      74.21
2                      74.00
4                      73.96
8                      73.95
16                     74.18
32                     74.81
64                     75.19
128                    75.67
256                    79.16
512                    88.08
1024                  109.05
2048                  155.38
4096                  190.44
8192                  251.58
16384                 378.31
32768                 654.48
65536                 225.11
131072                406.02
262144                671.12
524288               1020.81
1048576              1844.89
  • No labels