From a054fafd6f60b64c95760ed1f72d236ab63c87fb Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sun, 27 Oct 2024 14:45:30 +0000 Subject: [PATCH] add MI300X benchmark data --- benchmark/data/all_benchmark_data.csv | 506 ++++++++++++++++++++++++++ 1 file changed, 506 insertions(+) diff --git a/benchmark/data/all_benchmark_data.csv b/benchmark/data/all_benchmark_data.csv index 32c8d01a..610d2f52 100644 --- a/benchmark/data/all_benchmark_data.csv +++ b/benchmark/data/all_benchmark_data.csv @@ -505,3 +505,509 @@ fused_linear_jsd,torch,full,memory,MB,BT,B x T,1024,10609.005859375,10609.005859 fused_linear_jsd,torch,full,memory,MB,BT,B x T,2048,17146.009765625,17146.009765625,17146.009765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:35,0.3.1 fused_linear_jsd,torch,full,memory,MB,BT,B x T,4096,30220.017578125,30220.017578125,30220.017578125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:35,0.3.1 fused_linear_jsd,torch,full,memory,MB,BT,B x T,8192,56368.015625,56368.015625,56368.015625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:35,0.3.1 +layer_norm,liger,forward,speed,ms,N,hidden size,1024,0.0513870008289814,0.05066299811005592,0.05195000022649765,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:49,0.3.1 +layer_norm,liger,forward,speed,ms,N,hidden size,2048,0.06013600155711174,0.030950000509619713,0.06118500232696533,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:49,0.3.1 +layer_norm,liger,forward,speed,ms,N,hidden size,4096,0.05540600046515465,0.0538019984960556,0.08106400072574615,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:49,0.3.1 +layer_norm,liger,forward,speed,ms,N,hidden size,8192,0.10074900090694427,0.09890219569206238,0.1294132024049759,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:49,0.3.1 +layer_norm,liger,forward,speed,ms,N,hidden size,16384,0.17824499309062958,0.17584000527858734,0.18165259063243866,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:49,0.3.1 +layer_norm,huggingface,forward,speed,ms,N,hidden size,1024,0.025107000023126602,0.02386399917304516,0.026100000366568565,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:52,0.3.1 +layer_norm,huggingface,forward,speed,ms,N,hidden size,2048,0.032593999058008194,0.031922001391649246,0.035962000489234924,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:52,0.3.1 +layer_norm,huggingface,forward,speed,ms,N,hidden size,4096,0.05528600141406059,0.054092999547719955,0.05670800060033798,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:52,0.3.1 +layer_norm,huggingface,forward,speed,ms,N,hidden size,8192,0.11466100066900253,0.11349800229072571,0.11594299972057343,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:52,0.3.1 +layer_norm,huggingface,forward,speed,ms,N,hidden size,16384,0.22394900023937225,0.22238540649414062,0.22563199698925018,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:52,0.3.1 +layer_norm,liger,full,speed,ms,N,hidden size,1024,0.5856505036354065,0.5555411577224731,0.6316103935241699,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:55,0.3.1 +layer_norm,liger,full,speed,ms,N,hidden size,2048,0.8036559820175171,0.7926700115203857,0.8125370144844055,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:55,0.3.1 +layer_norm,liger,full,speed,ms,N,hidden size,4096,0.9858289957046509,0.9631498456001282,1.0054434537887573,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:55,0.3.1 +layer_norm,liger,full,speed,ms,N,hidden size,8192,0.7981730103492737,0.7880899906158447,0.8073430061340332,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:55,0.3.1 +layer_norm,liger,full,speed,ms,N,hidden size,16384,0.8172969818115234,0.8059061765670776,0.8261289596557617,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:55,0.3.1 +layer_norm,huggingface,full,speed,ms,N,hidden size,1024,0.3071730136871338,0.30254700779914856,0.3124876022338867,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,speed,ms,N,hidden size,2048,0.2821410000324249,0.2781337797641754,0.2876429855823517,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,speed,ms,N,hidden size,4096,0.30395999550819397,0.2873130142688751,0.3167490065097809,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,speed,ms,N,hidden size,8192,0.47268399596214294,0.4704889953136444,0.4751090109348297,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,speed,ms,N,hidden size,16384,0.9772189855575562,0.9728891849517822,0.981613039970398,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,liger,full,memory,MB,N,hidden size,1024,82.4375,82.4375,82.4375,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,liger,full,memory,MB,N,hidden size,2048,164.84375,164.84375,164.84375,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,liger,full,memory,MB,N,hidden size,4096,329.65625,329.65625,329.65625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,liger,full,memory,MB,N,hidden size,8192,659.28125,659.28125,659.28125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,liger,full,memory,MB,N,hidden size,16384,1318.53125,1318.53125,1318.53125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,memory,MB,N,hidden size,1024,80.5625,80.5625,80.5625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,memory,MB,N,hidden size,2048,161.09375,161.09375,161.09375,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,memory,MB,N,hidden size,4096,322.15625,322.15625,322.15625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,memory,MB,N,hidden size,8192,644.28125,644.28125,644.28125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +layer_norm,huggingface,full,memory,MB,N,hidden size,16384,1288.53125,1288.53125,1288.53125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:32:58,0.3.1 +kl_div,liger,full,memory,MB,V,vocab size,4096,384.0009765625,384.0009765625,384.0009765625,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,liger,full,memory,MB,V,vocab size,8192,768.0009765625,768.0009765625,768.0009765625,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,liger,full,memory,MB,V,vocab size,16384,1536.0009765625,1536.0009765625,1536.0009765625,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,liger,full,memory,MB,V,vocab size,32768,3072.0009765625,3072.0009765625,3072.0009765625,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,liger,full,memory,MB,V,vocab size,65536,6144.0009765625,6144.0009765625,6144.0009765625,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,liger,full,memory,MB,V,vocab size,131072,12288.0009765625,12288.0009765625,12288.0009765625,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,torch,full,memory,MB,V,vocab size,4096,448.0,448.0,448.0,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,torch,full,memory,MB,V,vocab size,8192,896.0,896.0,896.0,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,torch,full,memory,MB,V,vocab size,16384,1792.0,1792.0,1792.0,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,torch,full,memory,MB,V,vocab size,32768,3584.0,3584.0,3584.0,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,torch,full,memory,MB,V,vocab size,65536,7168.0,7168.0,7168.0,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,torch,full,memory,MB,V,vocab size,131072,14336.0,14336.0,14336.0,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:07,0.3.1 +kl_div,liger,forward,speed,ms,V,vocab size,4096,0.0772550031542778,0.07681400328874588,0.07781700044870377,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:08,0.3.1 +kl_div,liger,forward,speed,ms,V,vocab size,8192,0.11133299767971039,0.11029079556465149,0.11249499768018723,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:08,0.3.1 +kl_div,liger,forward,speed,ms,V,vocab size,16384,0.182855486869812,0.18180499970912933,0.184346005320549,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:08,0.3.1 +kl_div,liger,forward,speed,ms,V,vocab size,32768,0.34797999262809753,0.34415799379348755,0.3524560034275055,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:08,0.3.1 +kl_div,liger,forward,speed,ms,V,vocab size,65536,0.6381700038909912,0.5871893763542175,0.6503890156745911,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:08,0.3.1 +kl_div,liger,forward,speed,ms,V,vocab size,131072,1.2901300191879272,1.1854605674743652,1.309309720993042,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:08,0.3.1 +kl_div,torch,forward,speed,ms,V,vocab size,4096,0.3496739864349365,0.34630659222602844,0.37128299474716187,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:09,0.3.1 +kl_div,torch,forward,speed,ms,V,vocab size,8192,0.508434534072876,0.5054479837417603,0.5356770157814026,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:09,0.3.1 +kl_div,torch,forward,speed,ms,V,vocab size,16384,0.8596720099449158,0.8532760143280029,0.8820013999938965,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:09,0.3.1 +kl_div,torch,forward,speed,ms,V,vocab size,32768,1.5063810348510742,1.4961813688278198,1.5300829410552979,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:09,0.3.1 +kl_div,torch,forward,speed,ms,V,vocab size,65536,2.8009610176086426,2.78275990486145,2.818673610687256,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:09,0.3.1 +kl_div,torch,forward,speed,ms,V,vocab size,131072,5.361676216125488,5.321221351623535,5.470343589782715,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:09,0.3.1 +kl_div,liger,full,speed,ms,V,vocab size,4096,0.635703980922699,0.576960027217865,0.7208669781684875,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,liger,full,speed,ms,V,vocab size,8192,0.8908839821815491,0.8651646375656128,0.907164990901947,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,liger,full,speed,ms,V,vocab size,16384,1.0750620365142822,1.0412869453430176,1.0853954553604126,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,liger,full,speed,ms,V,vocab size,32768,2.025920867919922,2.011568069458008,2.0445730686187744,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,liger,full,speed,ms,V,vocab size,65536,3.8528499603271484,3.801870107650757,4.138711929321289,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,liger,full,speed,ms,V,vocab size,131072,7.911167144775391,7.777632236480713,7.972530841827393,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,torch,full,speed,ms,V,vocab size,4096,0.5614955425262451,0.5545071959495544,0.5869007706642151,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,torch,full,speed,ms,V,vocab size,8192,0.935885488986969,0.924938976764679,0.9627301692962646,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,torch,full,speed,ms,V,vocab size,16384,1.7219109535217285,1.6842973232269287,1.7517859935760498,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,torch,full,speed,ms,V,vocab size,32768,3.3199400901794434,3.297264575958252,3.344266891479492,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,torch,full,speed,ms,V,vocab size,65536,6.473502159118652,6.4203972816467285,6.523375034332275,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +kl_div,torch,full,speed,ms,V,vocab size,131072,12.803478240966797,12.674408912658691,12.970687866210938,"{""B"": 8, ""T"": 512}",AMD Instinct MI300X,2024-10-27 13:33:10,0.3.1 +cross_entropy,liger,forward,speed,ms,V,vocab size,4096,0.3168194890022278,0.29112300276756287,0.32340219616889954,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:48,0.3.1 +cross_entropy,liger,forward,speed,ms,V,vocab size,8192,0.4351480007171631,0.42261579632759094,0.44354361295700073,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:48,0.3.1 +cross_entropy,liger,forward,speed,ms,V,vocab size,16384,0.9034919738769531,0.8868139982223511,0.9164010286331177,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:48,0.3.1 +cross_entropy,liger,forward,speed,ms,V,vocab size,32768,1.7561280727386475,1.736985445022583,1.7775328159332275,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:48,0.3.1 +cross_entropy,liger,forward,speed,ms,V,vocab size,65536,3.621946096420288,3.5657143592834473,3.65287184715271,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:48,0.3.1 +cross_entropy,liger,forward,speed,ms,V,vocab size,131072,7.605432987213135,7.543676376342773,7.6524200439453125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:48,0.3.1 +cross_entropy,huggingface,forward,speed,ms,V,vocab size,4096,0.7363920211791992,0.7278347611427307,0.7410606145858765,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:49,0.3.1 +cross_entropy,huggingface,forward,speed,ms,V,vocab size,8192,0.9540370106697083,0.9469709992408752,0.9590979814529419,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:49,0.3.1 +cross_entropy,huggingface,forward,speed,ms,V,vocab size,16384,1.3869600296020508,1.3770053386688232,1.3994064331054688,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:49,0.3.1 +cross_entropy,huggingface,forward,speed,ms,V,vocab size,32768,2.4382169246673584,2.4235363006591797,2.4610695838928223,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:49,0.3.1 +cross_entropy,huggingface,forward,speed,ms,V,vocab size,65536,4.154746055603027,4.139894485473633,4.166340351104736,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:49,0.3.1 +cross_entropy,huggingface,forward,speed,ms,V,vocab size,131072,9.990569114685059,9.88729476928711,10.0206298828125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:49,0.3.1 +cross_entropy,liger,full,speed,ms,V,vocab size,4096,0.6591919660568237,0.645929217338562,0.671586811542511,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:50,0.3.1 +cross_entropy,liger,full,speed,ms,V,vocab size,8192,0.9281070232391357,0.9077731966972351,0.9469605684280396,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:50,0.3.1 +cross_entropy,liger,full,speed,ms,V,vocab size,16384,1.6586675643920898,1.6357308626174927,1.685123085975647,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:50,0.3.1 +cross_entropy,liger,full,speed,ms,V,vocab size,32768,3.171483039855957,3.1493120193481445,3.197381019592285,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:50,0.3.1 +cross_entropy,liger,full,speed,ms,V,vocab size,65536,6.144975662231445,6.099291801452637,6.1752238273620605,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:50,0.3.1 +cross_entropy,liger,full,speed,ms,V,vocab size,131072,12.535148620605469,12.492684364318848,12.614994049072266,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:50,0.3.1 +cross_entropy,huggingface,full,speed,ms,V,vocab size,4096,1.4950050115585327,1.4789749383926392,1.5141671895980835,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,speed,ms,V,vocab size,8192,2.2622570991516113,2.2506818771362305,2.276822328567505,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,speed,ms,V,vocab size,16384,3.692180633544922,3.678623914718628,3.7087130546569824,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,speed,ms,V,vocab size,32768,7.1062188148498535,7.094897270202637,7.119569301605225,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,speed,ms,V,vocab size,65536,13.639016151428223,13.520066261291504,13.699321746826172,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,speed,ms,V,vocab size,131072,28.35840606689453,28.281648635864258,28.48259925842285,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,liger,full,memory,MB,V,vocab size,4096,256.32861328125,256.32861328125,256.32861328125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,liger,full,memory,MB,V,vocab size,8192,512.32861328125,512.32861328125,512.32861328125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,liger,full,memory,MB,V,vocab size,16384,1024.32861328125,1024.32861328125,1024.32861328125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,liger,full,memory,MB,V,vocab size,32768,2048.32861328125,2048.32861328125,2048.32861328125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,liger,full,memory,MB,V,vocab size,65536,4096.32861328125,4096.32861328125,4096.32861328125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,liger,full,memory,MB,V,vocab size,131072,8192.328125,8192.328125,8192.328125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,memory,MB,V,vocab size,4096,1280.1259765625,1280.1259765625,1280.1259765625,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,memory,MB,V,vocab size,8192,2560.1259765625,2560.1259765625,2560.1259765625,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,memory,MB,V,vocab size,16384,5120.1259765625,5120.1259765625,5120.1259765625,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,memory,MB,V,vocab size,32768,10240.1259765625,10240.1259765625,10240.1259765625,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,memory,MB,V,vocab size,65536,20480.125,20480.125,20480.125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +cross_entropy,huggingface,full,memory,MB,V,vocab size,131072,40960.125,40960.125,40960.125,"{""B"": 8, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:33:51,0.3.1 +jsd,liger,full,memory,MB,V,vocab size,4096,768.005859375,768.005859375,768.005859375,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,liger,full,memory,MB,V,vocab size,8192,1536.005859375,1536.005859375,1536.005859375,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,liger,full,memory,MB,V,vocab size,16384,3072.005859375,3072.005859375,3072.005859375,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,liger,full,memory,MB,V,vocab size,32768,6144.005859375,6144.005859375,6144.005859375,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,liger,full,memory,MB,V,vocab size,65536,12288.005859375,12288.005859375,12288.005859375,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,liger,full,memory,MB,V,vocab size,131072,24576.005859375,24576.005859375,24576.005859375,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,torch,full,memory,MB,V,vocab size,4096,1664.0009765625,1664.0009765625,1664.0009765625,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,torch,full,memory,MB,V,vocab size,8192,3328.0009765625,3328.0009765625,3328.0009765625,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,torch,full,memory,MB,V,vocab size,16384,6656.0009765625,6656.0009765625,6656.0009765625,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,torch,full,memory,MB,V,vocab size,32768,13312.0009765625,13312.0009765625,13312.0009765625,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,torch,full,memory,MB,V,vocab size,65536,26624.0,26624.0,26624.0,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,torch,full,memory,MB,V,vocab size,131072,53248.0,53248.0,53248.0,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:01,0.3.1 +jsd,liger,forward,speed,ms,V,vocab size,4096,0.44636398553848267,0.44262298941612244,0.45026659965515137,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:03,0.3.1 +jsd,liger,forward,speed,ms,V,vocab size,8192,0.697298526763916,0.6942470073699951,0.700872004032135,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:03,0.3.1 +jsd,liger,forward,speed,ms,V,vocab size,16384,1.1154179573059082,1.1072750091552734,1.1245540380477905,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:03,0.3.1 +jsd,liger,forward,speed,ms,V,vocab size,32768,2.1704490184783936,2.1432435512542725,2.199099063873291,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:03,0.3.1 +jsd,liger,forward,speed,ms,V,vocab size,65536,8.948663711547852,8.924219131469727,8.957974433898926,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:03,0.3.1 +jsd,liger,forward,speed,ms,V,vocab size,131072,17.715343475341797,17.66632080078125,17.723957061767578,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:03,0.3.1 +jsd,torch,forward,speed,ms,V,vocab size,4096,1.035792589187622,1.0222814083099365,1.07686185836792,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:04,0.3.1 +jsd,torch,forward,speed,ms,V,vocab size,8192,2.1558570861816406,2.112277030944824,2.2122883796691895,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:04,0.3.1 +jsd,torch,forward,speed,ms,V,vocab size,16384,4.237304210662842,4.1245198249816895,4.446786880493164,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:04,0.3.1 +jsd,torch,forward,speed,ms,V,vocab size,32768,8.342867851257324,8.22079086303711,8.58433723449707,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:04,0.3.1 +jsd,torch,forward,speed,ms,V,vocab size,65536,16.76507568359375,16.656991958618164,17.009769439697266,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:04,0.3.1 +jsd,torch,forward,speed,ms,V,vocab size,131072,33.677642822265625,33.59162521362305,33.76366424560547,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:04,0.3.1 +jsd,liger,full,speed,ms,V,vocab size,4096,0.74344801902771,0.7395089864730835,0.7570453882217407,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:05,0.3.1 +jsd,liger,full,speed,ms,V,vocab size,8192,1.2169040441513062,1.2002742290496826,1.2250720262527466,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:05,0.3.1 +jsd,liger,full,speed,ms,V,vocab size,16384,2.072446346282959,2.058866024017334,2.0826199054718018,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:05,0.3.1 +jsd,liger,full,speed,ms,V,vocab size,32768,4.268534183502197,4.247622966766357,4.290736675262451,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:05,0.3.1 +jsd,liger,full,speed,ms,V,vocab size,65536,13.33761215209961,13.313333511352539,13.350702285766602,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:05,0.3.1 +jsd,liger,full,speed,ms,V,vocab size,131072,26.278703689575195,26.23784065246582,26.427383422851562,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:05,0.3.1 +jsd,torch,full,speed,ms,V,vocab size,4096,2.5102219581604004,2.480088710784912,2.542630434036255,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:07,0.3.1 +jsd,torch,full,speed,ms,V,vocab size,8192,5.236493110656738,5.168169975280762,5.403231143951416,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:07,0.3.1 +jsd,torch,full,speed,ms,V,vocab size,16384,10.473868370056152,10.262763977050781,10.606208801269531,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:07,0.3.1 +jsd,torch,full,speed,ms,V,vocab size,32768,21.00827407836914,20.7320613861084,21.116792678833008,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:07,0.3.1 +jsd,torch,full,speed,ms,V,vocab size,65536,41.4166259765625,41.391536712646484,41.44171142578125,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:07,0.3.1 +jsd,torch,full,speed,ms,V,vocab size,131072,81.93801879882812,81.93801879882812,81.93801879882812,"{""B"": 4, ""T"": 2048}",AMD Instinct MI300X,2024-10-27 13:34:07,0.3.1 +fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,4096,96.1857681274414,96.1857681274414,96.1857681274414,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:01,0.3.1 +fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,8192,129.94403076171875,129.94403076171875,129.94403076171875,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:01,0.3.1 +fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,16384,190.29368591308594,190.29368591308594,190.29368591308594,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:01,0.3.1 +fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,32768,281.5962829589844,281.5962829589844,281.5962829589844,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:01,0.3.1 +fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,4096,10.025449752807617,9.961536407470703,10.207632064819336,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:29,0.3.1 +fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,8192,19.479944229125977,19.445049285888672,19.48929214477539,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:29,0.3.1 +fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,16384,38.543678283691406,38.50013732910156,38.58721923828125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:29,0.3.1 +fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,32768,77.390380859375,77.390380859375,77.390380859375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:35:29,0.3.1 +fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,4096,97.05310821533203,97.05310821533203,97.05310821533203,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:03,0.3.1 +fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,8192,129.90618896484375,129.90618896484375,129.90618896484375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:03,0.3.1 +fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,16384,192.0047149658203,192.0047149658203,192.0047149658203,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:03,0.3.1 +fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,32768,283.6263122558594,283.6263122558594,283.6263122558594,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:03,0.3.1 +fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,4096,34.60418701171875,34.562156677246094,34.646217346191406,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:34,0.3.1 +fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,8192,65.18518829345703,65.18518829345703,65.18518829345703,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:34,0.3.1 +fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,16384,106.78714752197266,106.78714752197266,106.78714752197266,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:34,0.3.1 +fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,32768,244.2110595703125,244.2110595703125,244.2110595703125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:36:34,0.3.1 +fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,4096,4229.2978515625,4229.2978515625,4229.2978515625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:09,0.3.1 +fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,8192,4450.7197265625,4450.7197265625,4450.7197265625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:09,0.3.1 +fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,16384,4894.1884765625,4894.1884765625,4894.1884765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:09,0.3.1 +fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,32768,5778.3759765625,5778.3759765625,5778.3759765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:09,0.3.1 +fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,4096,6076.0322265625,6076.0322265625,6076.0322265625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:36,0.3.1 +fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,8192,9146.0634765625,9146.0634765625,9146.0634765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:36,0.3.1 +fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,16384,15286.1259765625,15286.1259765625,15286.1259765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:36,0.3.1 +fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,32768,27566.25,27566.25,27566.25,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:37:36,0.3.1 +swiglu,liger,forward,speed,ms,T,sequence length,1024,2.3810179233551025,2.3799896240234375,2.3820462226867676,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:02,0.3.1 +swiglu,liger,forward,speed,ms,T,sequence length,2048,5.107611179351807,5.107611179351807,5.107611179351807,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:02,0.3.1 +swiglu,liger,forward,speed,ms,T,sequence length,4096,9.686328887939453,9.686328887939453,9.686328887939453,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:02,0.3.1 +swiglu,liger,forward,speed,ms,T,sequence length,8192,16.848268508911133,16.848268508911133,16.848268508911133,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:02,0.3.1 +swiglu,huggingface,forward,speed,ms,T,sequence length,1024,2.455538034439087,2.406628370285034,2.4592905044555664,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:06,0.3.1 +swiglu,huggingface,forward,speed,ms,T,sequence length,2048,5.220818042755127,5.220818042755127,5.220818042755127,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:06,0.3.1 +swiglu,huggingface,forward,speed,ms,T,sequence length,4096,10.080816268920898,10.080816268920898,10.080816268920898,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:06,0.3.1 +swiglu,huggingface,forward,speed,ms,T,sequence length,8192,17.05329132080078,17.05329132080078,17.05329132080078,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:06,0.3.1 +swiglu,liger,full,memory,MB,T,sequence length,1024,1084.0,1084.0,1084.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:09,0.3.1 +swiglu,liger,full,memory,MB,T,sequence length,2048,1566.0,1566.0,1566.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:09,0.3.1 +swiglu,liger,full,memory,MB,T,sequence length,4096,2530.0,2530.0,2530.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:09,0.3.1 +swiglu,liger,full,memory,MB,T,sequence length,8192,4458.0,4458.0,4458.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:09,0.3.1 +swiglu,huggingface,full,memory,MB,T,sequence length,1024,1279.0,1279.0,1279.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:12,0.3.1 +swiglu,huggingface,full,memory,MB,T,sequence length,2048,1976.0,1976.0,1976.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:12,0.3.1 +swiglu,huggingface,full,memory,MB,T,sequence length,4096,3436.0,3436.0,3436.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:12,0.3.1 +swiglu,huggingface,full,memory,MB,T,sequence length,8192,6356.0,6356.0,6356.0,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",AMD Instinct MI300X,2024-10-27 13:38:12,0.3.1 +rope,liger,forward,speed,ms,H,hidden size,512,0.019044000655412674,0.016637399792671204,0.022129999473690987,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:35,0.3.1 +rope,liger,forward,speed,ms,H,hidden size,2048,0.04253600165247917,0.04125399887561798,0.04426100105047226,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:35,0.3.1 +rope,liger,forward,speed,ms,H,hidden size,8192,0.04534300044178963,0.043768998235464096,0.047257199883461,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:35,0.3.1 +rope,huggingface,forward,speed,ms,H,hidden size,512,0.08274800330400467,0.08134499937295914,0.08483300358057022,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:37,0.3.1 +rope,huggingface,forward,speed,ms,H,hidden size,2048,0.09074600040912628,0.09000500291585922,0.09165800362825394,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:37,0.3.1 +rope,huggingface,forward,speed,ms,H,hidden size,8192,0.2641800045967102,0.26053398847579956,0.2685680091381073,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:37,0.3.1 +rope,liger,backward,speed,ms,H,hidden size,512,0.11862999945878983,0.11670500040054321,0.12232600152492523,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:38,0.3.1 +rope,liger,backward,speed,ms,H,hidden size,2048,0.3065965175628662,0.30068299174308777,0.314193993806839,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:38,0.3.1 +rope,liger,backward,speed,ms,H,hidden size,8192,0.14058899879455566,0.12772999703884125,0.1764110028743744,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:38,0.3.1 +rope,huggingface,backward,speed,ms,H,hidden size,512,0.3265414834022522,0.30385100841522217,0.33211401104927063,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:40,0.3.1 +rope,huggingface,backward,speed,ms,H,hidden size,2048,0.5006470084190369,0.49396198987960815,0.5064125657081604,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:40,0.3.1 +rope,huggingface,backward,speed,ms,H,hidden size,8192,0.5749859809875488,0.565792977809906,0.58744877576828,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:40,0.3.1 +rope,liger,full,speed,ms,H,hidden size,512,0.47447800636291504,0.44701600074768066,0.5115057826042175,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:41,0.3.1 +rope,liger,full,speed,ms,H,hidden size,2048,0.41377899050712585,0.40928900241851807,0.41944098472595215,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:41,0.3.1 +rope,liger,full,speed,ms,H,hidden size,8192,0.38867199420928955,0.3835989832878113,0.3949737846851349,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:41,0.3.1 +rope,huggingface,full,speed,ms,H,hidden size,512,1.1267789602279663,1.0680656433105469,1.1334789991378784,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,huggingface,full,speed,ms,H,hidden size,2048,1.1234314441680908,1.1176209449768066,1.1297080516815186,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,huggingface,full,speed,ms,H,hidden size,8192,1.1253160238265991,1.1193219423294067,1.132220983505249,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,liger,full,memory,MB,H,hidden size,512,5.14111328125,5.14111328125,5.14111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,liger,full,memory,MB,H,hidden size,2048,20.51611328125,20.51611328125,20.51611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,liger,full,memory,MB,H,hidden size,8192,82.01611328125,82.01611328125,82.01611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,huggingface,full,memory,MB,H,hidden size,512,14.14111328125,14.14111328125,14.14111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,huggingface,full,memory,MB,H,hidden size,2048,56.51611328125,56.51611328125,56.51611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,huggingface,full,memory,MB,H,hidden size,8192,226.01611328125,226.01611328125,226.01611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:43,0.3.1 +rope,liger,forward,speed,ms,T,sequence length,1024,0.0313510000705719,0.02990799956023693,0.03239399939775467,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:45,0.3.1 +rope,liger,forward,speed,ms,T,sequence length,2048,0.045472998172044754,0.04382999986410141,0.047557998448610306,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:45,0.3.1 +rope,liger,forward,speed,ms,T,sequence length,4096,0.0782879963517189,0.07574199885129929,0.08123680204153061,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:45,0.3.1 +rope,liger,forward,speed,ms,T,sequence length,8192,0.12676799297332764,0.1248830035328865,0.12956400215625763,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:45,0.3.1 +rope,liger,forward,speed,ms,T,sequence length,16384,0.24643948674201965,0.2358555942773819,0.2543700039386749,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:45,0.3.1 +rope,huggingface,forward,speed,ms,T,sequence length,1024,0.139957994222641,0.13853219151496887,0.1416739970445633,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:47,0.3.1 +rope,huggingface,forward,speed,ms,T,sequence length,2048,0.26664599776268005,0.26142799854278564,0.27042677998542786,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:47,0.3.1 +rope,huggingface,forward,speed,ms,T,sequence length,4096,0.4514254927635193,0.4477370083332062,0.4563803970813751,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:47,0.3.1 +rope,huggingface,forward,speed,ms,T,sequence length,8192,0.8696755170822144,0.8519269824028015,0.886677622795105,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:47,0.3.1 +rope,huggingface,forward,speed,ms,T,sequence length,16384,1.7253990173339844,1.7028999328613281,1.749678373336792,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:47,0.3.1 +rope,liger,backward,speed,ms,T,sequence length,1024,0.2872524857521057,0.22209499776363373,0.2999109923839569,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:50,0.3.1 +rope,liger,backward,speed,ms,T,sequence length,2048,0.29409798979759216,0.29081079363822937,0.29828178882598877,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:50,0.3.1 +rope,liger,backward,speed,ms,T,sequence length,4096,0.2889859974384308,0.28520119190216064,0.2930597960948944,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:50,0.3.1 +rope,liger,backward,speed,ms,T,sequence length,8192,0.30679699778556824,0.3025650084018707,0.311598002910614,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:50,0.3.1 +rope,liger,backward,speed,ms,T,sequence length,16384,0.3049674928188324,0.3000775873661041,0.31046998500823975,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:50,0.3.1 +rope,huggingface,backward,speed,ms,T,sequence length,1024,0.6077799797058105,0.5683410167694092,0.660319983959198,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:52,0.3.1 +rope,huggingface,backward,speed,ms,T,sequence length,2048,0.613924503326416,0.5838446021080017,0.6215559840202332,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:52,0.3.1 +rope,huggingface,backward,speed,ms,T,sequence length,4096,0.6292945146560669,0.6012133955955505,0.6550275683403015,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:52,0.3.1 +rope,huggingface,backward,speed,ms,T,sequence length,8192,1.0817270278930664,1.0568008422851562,1.1114602088928223,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:52,0.3.1 +rope,huggingface,backward,speed,ms,T,sequence length,16384,2.17606258392334,2.1427149772644043,2.2074382305145264,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:52,0.3.1 +rope,liger,full,speed,ms,T,sequence length,1024,0.34369000792503357,0.2882840037345886,0.380264014005661,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:54,0.3.1 +rope,liger,full,speed,ms,T,sequence length,2048,0.38711899518966675,0.3826189935207367,0.3925119936466217,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:54,0.3.1 +rope,liger,full,speed,ms,T,sequence length,4096,0.3921760022640228,0.38806381821632385,0.39836499094963074,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:54,0.3.1 +rope,liger,full,speed,ms,T,sequence length,8192,0.4039669930934906,0.39859721064567566,0.4107010066509247,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:54,0.3.1 +rope,liger,full,speed,ms,T,sequence length,16384,0.49592599272727966,0.4803310036659241,0.5079135894775391,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:54,0.3.1 +rope,huggingface,full,speed,ms,T,sequence length,1024,1.1134295463562012,0.9298979640007019,1.1324522495269775,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,speed,ms,T,sequence length,2048,1.1286640167236328,1.1238166093826294,1.1339516639709473,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,speed,ms,T,sequence length,4096,1.1484429836273193,1.1290102005004883,1.1682971715927124,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,speed,ms,T,sequence length,8192,1.9476640224456787,1.9213647842407227,1.9782134294509888,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,speed,ms,T,sequence length,16384,3.8930420875549316,3.851893186569214,3.9290683269500732,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,liger,full,memory,MB,T,sequence length,1024,41.00830078125,41.00830078125,41.00830078125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,liger,full,memory,MB,T,sequence length,2048,82.01611328125,82.01611328125,82.01611328125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,liger,full,memory,MB,T,sequence length,4096,164.03173828125,164.03173828125,164.03173828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,liger,full,memory,MB,T,sequence length,8192,328.06298828125,328.06298828125,328.06298828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,liger,full,memory,MB,T,sequence length,16384,656.12548828125,656.12548828125,656.12548828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,memory,MB,T,sequence length,1024,113.00830078125,113.00830078125,113.00830078125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,memory,MB,T,sequence length,2048,226.01611328125,226.01611328125,226.01611328125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,memory,MB,T,sequence length,4096,452.03173828125,452.03173828125,452.03173828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,memory,MB,T,sequence length,8192,904.06298828125,904.06298828125,904.06298828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +rope,huggingface,full,memory,MB,T,sequence length,16384,1808.12548828125,1808.12548828125,1808.12548828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",AMD Instinct MI300X,2024-10-27 13:38:57,0.3.1 +fused_linear_jsd,liger,forward,speed,ms,BT,B x T,1024,99.69364166259766,99.69364166259766,99.69364166259766,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:22,0.3.1 +fused_linear_jsd,liger,forward,speed,ms,BT,B x T,2048,108.41561126708984,108.41561126708984,108.41561126708984,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:22,0.3.1 +fused_linear_jsd,liger,forward,speed,ms,BT,B x T,4096,127.74031066894531,127.74031066894531,127.74031066894531,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:22,0.3.1 +fused_linear_jsd,liger,forward,speed,ms,BT,B x T,8192,177.58343505859375,177.58343505859375,177.58343505859375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:22,0.3.1 +fused_linear_jsd,torch,forward,speed,ms,BT,B x T,1024,9.92261791229248,9.893375396728516,9.931133270263672,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:24,0.3.1 +fused_linear_jsd,torch,forward,speed,ms,BT,B x T,2048,19.393369674682617,19.340970993041992,19.529766082763672,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:24,0.3.1 +fused_linear_jsd,torch,forward,speed,ms,BT,B x T,4096,39.715267181396484,39.70789337158203,39.72264099121094,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:24,0.3.1 +fused_linear_jsd,torch,forward,speed,ms,BT,B x T,8192,77.27423858642578,77.27423858642578,77.27423858642578,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:24,0.3.1 +fused_linear_jsd,liger,full,speed,ms,BT,B x T,1024,100.77845001220703,100.77845001220703,100.77845001220703,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:28,0.3.1 +fused_linear_jsd,liger,full,speed,ms,BT,B x T,2048,108.79886627197266,108.79886627197266,108.79886627197266,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:28,0.3.1 +fused_linear_jsd,liger,full,speed,ms,BT,B x T,4096,128.9139862060547,128.9139862060547,128.9139862060547,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:28,0.3.1 +fused_linear_jsd,liger,full,speed,ms,BT,B x T,8192,178.08639526367188,178.08639526367188,178.08639526367188,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:28,0.3.1 +fused_linear_jsd,torch,full,speed,ms,BT,B x T,1024,28.217811584472656,28.10204315185547,28.402023315429688,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:31,0.3.1 +fused_linear_jsd,torch,full,speed,ms,BT,B x T,2048,52.16404342651367,52.16404342651367,52.16404342651367,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:31,0.3.1 +fused_linear_jsd,torch,full,speed,ms,BT,B x T,4096,108.397705078125,108.397705078125,108.397705078125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:31,0.3.1 +fused_linear_jsd,torch,full,speed,ms,BT,B x T,8192,208.48602294921875,208.48602294921875,208.48602294921875,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:31,0.3.1 +fused_linear_jsd,liger,full,memory,MB,BT,B x T,1024,4587.96923828125,4587.96923828125,4587.96923828125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +fused_linear_jsd,liger,full,memory,MB,BT,B x T,2048,5167.93798828125,5167.93798828125,5167.93798828125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +fused_linear_jsd,liger,full,memory,MB,BT,B x T,4096,6327.87548828125,6327.87548828125,6327.87548828125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +fused_linear_jsd,liger,full,memory,MB,BT,B x T,8192,8647.75,8647.75,8647.75,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +fused_linear_jsd,torch,full,memory,MB,BT,B x T,1024,10545.00390625,10545.00390625,10545.00390625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +fused_linear_jsd,torch,full,memory,MB,BT,B x T,2048,17082.0078125,17082.0078125,17082.0078125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +fused_linear_jsd,torch,full,memory,MB,BT,B x T,4096,30156.015625,30156.015625,30156.015625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +fused_linear_jsd,torch,full,memory,MB,BT,B x T,8192,56304.03125,56304.03125,56304.03125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:39:36,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,1024,0.024013999849557877,0.023333000019192696,0.02489900030195713,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,2048,0.023854000493884087,0.023492999374866486,0.024255000054836273,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,4096,0.02609900012612343,0.025498000904917717,0.027421999722719193,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,8192,0.029867000877857208,0.029347000643610954,0.030324600636959076,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,16384,0.03187299892306328,0.031383197754621506,0.03239300101995468,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,32768,0.03568100184202194,0.0351594015955925,0.03624200075864792,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,65536,0.0402120016515255,0.03953000158071518,0.04093300178647041,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,131072,0.04165399819612503,0.041053999215364456,0.04237600043416023,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:52,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,1024,0.07083100080490112,0.07019899785518646,0.07182300090789795,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,2048,0.07113199681043625,0.07053080201148987,0.07197300344705582,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,4096,0.04493200033903122,0.043813999742269516,0.06798499822616577,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,8192,0.06908699870109558,0.06833580136299133,0.0722208023071289,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,16384,0.0745600014925003,0.0734269991517067,0.0758420005440712,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,32768,0.07569199800491333,0.07450900226831436,0.07703500241041183,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,65536,0.07662899792194366,0.07538740336894989,0.07803700119256973,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,131072,0.07788699865341187,0.07669399678707123,0.07904979586601257,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:39:56,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,1024,0.03255400061607361,0.02999899908900261,0.034999001771211624,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,2048,0.028985999524593353,0.023654000833630562,0.05556600168347359,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,4096,0.03588150069117546,0.026541000232100487,0.056552402675151825,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,8192,0.02957249991595745,0.027021000161767006,0.05731400102376938,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,16384,0.030133001506328583,0.029380200430750847,0.05943100154399872,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,32768,0.038447000086307526,0.03772599995136261,0.06805000454187393,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,65536,0.04021099954843521,0.03944979980587959,0.06943800300359726,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,131072,0.04205550253391266,0.041317399591207504,0.07113800197839737,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:01,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,1024,3.349256992340088,3.348026990890503,3.3521318435668945,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,2048,3.354088068008423,3.352470636367798,3.355268716812134,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,4096,3.365614891052246,3.3632776737213135,3.3671698570251465,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,8192,3.3818514347076416,3.3789312839508057,3.384796142578125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,16384,3.4065470695495605,3.4053046703338623,3.409355401992798,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,32768,3.45664119720459,3.4537723064422607,3.462775707244873,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,65536,3.5657999515533447,3.5615034103393555,3.5899364948272705,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,131072,3.852560043334961,3.8315749168395996,3.862823247909546,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:05,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,1024,0.2998709976673126,0.2949739992618561,0.30557599663734436,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,2048,0.3239955008029938,0.31481900811195374,0.32934001088142395,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,4096,0.40167248249053955,0.37138399481773376,0.44633400440216064,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,8192,0.4907745122909546,0.46964699029922485,0.49765101075172424,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,16384,0.4460834860801697,0.4421345889568329,0.45341798663139343,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,32768,0.49610599875450134,0.4921579957008362,0.5217509865760803,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,65536,0.6150169968605042,0.608159601688385,0.626340389251709,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,131072,0.8112879991531372,0.8007280230522156,0.8273789882659912,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:08,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,1024,0.6423900127410889,0.6155179738998413,0.6870909929275513,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,2048,0.6512089967727661,0.6215031743049622,0.6784994006156921,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,4096,0.6685640215873718,0.6350101828575134,0.6937013864517212,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,8192,0.6141945123672485,0.574954628944397,0.645205020904541,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,16384,0.5982990264892578,0.5684733986854553,0.6368582248687744,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,32768,0.5995020270347595,0.5704596042633057,0.6390489935874939,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,65536,0.6109880208969116,0.5808089971542358,0.6419180035591125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,131072,0.6002529859542847,0.5718889832496643,0.6080852150917053,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:15,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,1024,0.08735799789428711,0.08675699681043625,0.08823999762535095,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,2048,0.10211200267076492,0.10143079608678818,0.10303399711847305,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,4096,0.1181889995932579,0.11708199977874756,0.11967200040817261,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,8192,0.14456899464130402,0.14352600276470184,0.14589199423789978,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,16384,0.16477400064468384,0.1635397970676422,0.16617700457572937,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,32768,0.17644000053405762,0.17514999210834503,0.17785200476646423,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,65536,0.17908699810504913,0.17700199782848358,0.18065819144248962,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,liger,forward,speed,ms,V,embedding dimension,131072,0.1801690012216568,0.17786000669002533,0.18243840336799622,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:28,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,1024,0.1546514928340912,0.15338900685310364,0.15611499547958374,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,2048,0.16429300606250763,0.1623767912387848,0.16637000441551208,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,4096,0.19023199379444122,0.1883392035961151,0.19224479794502258,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,8192,0.21705299615859985,0.21488840878009796,0.21944239735603333,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,16384,0.22711600363254547,0.2253119945526123,0.22954539954662323,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,32768,0.23068399727344513,0.2290804088115692,0.23255199193954468,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,65536,0.23112499713897705,0.22932100296020508,0.23332999646663666,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,huggingface,forward,speed,ms,V,embedding dimension,131072,0.23166599869728088,0.22932879626750946,0.2344367951154709,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:41,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,1024,0.1555130034685135,0.15395000576972961,0.15715700387954712,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,2048,0.16441400349140167,0.16272959113121033,0.16659459471702576,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,4096,0.1895110011100769,0.18774600327014923,0.1912750005722046,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,8192,0.21787500381469727,0.21547339856624603,0.22101400792598724,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,16384,0.22747650742530823,0.22547200322151184,0.2297860085964203,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,32768,0.2296614944934845,0.2282470017671585,0.23166200518608093,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,65536,0.23116500675678253,0.229601189494133,0.23396320641040802,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,torch_compile,forward,speed,ms,V,embedding dimension,131072,0.23192650079727173,0.2298019975423813,0.23461300134658813,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:40:55,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,1024,18.276464462280273,18.270610809326172,18.303680419921875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,2048,18.462324142456055,18.447486877441406,18.4769344329834,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,4096,18.67670440673828,18.669862747192383,18.68463706970215,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,8192,18.941423416137695,18.932281494140625,18.956079483032227,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,16384,19.202335357666016,19.19656753540039,19.20479965209961,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,32768,19.52157974243164,19.512903213500977,19.55003547668457,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,65536,20.134601593017578,20.108572006225586,20.140581130981445,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,liger,full,speed,ms,V,embedding dimension,131072,21.253036499023438,21.23211097717285,21.265459060668945,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:09,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,1024,0.6219429969787598,0.6193010210990906,0.6253083944320679,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,2048,0.681236982345581,0.6776757836341858,0.6854169964790344,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,4096,0.8018919825553894,0.7960910201072693,0.8120684027671814,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,8192,1.0099940299987793,0.9976135492324829,1.022810935974121,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,16384,1.2894790172576904,1.2850548028945923,1.3165520429611206,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,32768,1.637880563735962,1.6094815731048584,1.653237223625183,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,65536,2.1713271141052246,2.156087636947632,2.184225559234619,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,huggingface,full,speed,ms,V,embedding dimension,131072,3.1968960762023926,3.1853315830230713,3.20605206489563,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:22,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,1024,0.6689594984054565,0.6662330031394958,0.6732450127601624,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,2048,0.7368184924125671,0.7320162057876587,0.7431178092956543,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,4096,0.8027529716491699,0.7949268221855164,0.8136848211288452,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,8192,0.9631625413894653,0.9602010250091553,0.9893969893455505,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,16384,1.3263269662857056,1.3157662153244019,1.3419616222381592,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,32768,1.6437840461730957,1.6305795907974243,1.657544732093811,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,65536,2.1325440406799316,2.1253716945648193,2.146986722946167,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,torch_compile,full,speed,ms,V,embedding dimension,131072,3.1658010482788086,3.1595280170440674,3.179412364959717,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:36,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,1024,108.125,108.125,108.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,2048,120.125,120.125,120.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,4096,144.125,144.125,144.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,8192,192.125,192.125,192.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,16384,288.125,288.125,288.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,32768,480.125,480.125,480.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,65536,864.125,864.125,864.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,131072,1632.125,1632.125,1632.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:38,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,1024,116.537109375,116.537109375,116.537109375,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,2048,131.359375,131.359375,131.359375,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,4096,161.40625,161.40625,161.40625,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,8192,221.5,221.5,221.5,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,16384,341.6875,341.6875,341.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,32768,533.6875,533.6875,533.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,65536,917.6875,917.6875,917.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,131072,1685.6875,1685.6875,1685.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:40,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,1024,116.537109375,116.537109375,116.537109375,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,2048,131.359375,131.359375,131.359375,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,4096,161.40625,161.40625,161.40625,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,8192,221.5,221.5,221.5,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,16384,341.6875,341.6875,341.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,32768,533.6875,533.6875,533.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,65536,917.6875,917.6875,917.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,131072,1685.6875,1685.6875,1685.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:43,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,1024,576.125,576.125,576.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,2048,640.125,640.125,640.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,4096,768.125,768.125,768.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,8192,1024.125,1024.125,1024.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,16384,1536.125,1536.125,1536.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,32768,2560.125,2560.125,2560.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,65536,4608.125,4608.125,4608.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,liger,full,memory,MB,V,embedding dimension,131072,8704.125,8704.125,8704.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:41:55,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,1024,618.130859375,618.130859375,618.130859375,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,2048,698.154296875,698.154296875,698.154296875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,4096,858.201171875,858.201171875,858.201171875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,8192,1178.294921875,1178.294921875,1178.294921875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,16384,1818.482421875,1818.482421875,1818.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,32768,2842.482421875,2842.482421875,2842.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,65536,4890.482421875,4890.482421875,4890.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,huggingface,full,memory,MB,V,embedding dimension,131072,8986.482421875,8986.482421875,8986.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:07,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,1024,618.130859375,618.130859375,618.130859375,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,2048,698.154296875,698.154296875,698.154296875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,4096,858.201171875,858.201171875,858.201171875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,8192,1178.294921875,1178.294921875,1178.294921875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,16384,1818.482421875,1818.482421875,1818.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,32768,2842.482421875,2842.482421875,2842.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,65536,4890.482421875,4890.482421875,4890.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +embedding,torch_compile,full,memory,MB,V,embedding dimension,131072,8986.482421875,8986.482421875,8986.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",AMD Instinct MI300X,2024-10-27 13:42:20,0.3.1 +geglu,liger,full,speed,ms,T,sequence length,1024,14.66943073272705,14.66943073272705,14.66943073272705,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:45,0.3.1 +geglu,liger,full,speed,ms,T,sequence length,2048,28.020727157592773,28.020727157592773,28.020727157592773,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:45,0.3.1 +geglu,liger,full,speed,ms,T,sequence length,4096,52.34413528442383,52.34413528442383,52.34413528442383,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:45,0.3.1 +geglu,liger,full,speed,ms,T,sequence length,8192,104.92530822753906,104.92530822753906,104.92530822753906,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:45,0.3.1 +geglu,huggingface,full,speed,ms,T,sequence length,1024,14.806131362915039,14.806131362915039,14.806131362915039,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:50,0.3.1 +geglu,huggingface,full,speed,ms,T,sequence length,2048,28.9786319732666,28.9786319732666,28.9786319732666,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:50,0.3.1 +geglu,huggingface,full,speed,ms,T,sequence length,4096,53.78598403930664,53.78598403930664,53.78598403930664,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:50,0.3.1 +geglu,huggingface,full,speed,ms,T,sequence length,8192,108.10478973388672,108.10478973388672,108.10478973388672,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:50,0.3.1 +geglu,liger,forward,speed,ms,T,sequence length,1024,5.18009614944458,5.18009614944458,5.18009614944458,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:53,0.3.1 +geglu,liger,forward,speed,ms,T,sequence length,2048,9.733508110046387,9.733508110046387,9.733508110046387,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:53,0.3.1 +geglu,liger,forward,speed,ms,T,sequence length,4096,16.848751068115234,16.848751068115234,16.848751068115234,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:53,0.3.1 +geglu,liger,forward,speed,ms,T,sequence length,8192,33.88641357421875,33.88641357421875,33.88641357421875,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:53,0.3.1 +geglu,huggingface,forward,speed,ms,T,sequence length,1024,5.340490818023682,5.340490818023682,5.340490818023682,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:57,0.3.1 +geglu,huggingface,forward,speed,ms,T,sequence length,2048,9.88316822052002,9.88316822052002,9.88316822052002,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:57,0.3.1 +geglu,huggingface,forward,speed,ms,T,sequence length,4096,17.31380844116211,17.31380844116211,17.31380844116211,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:57,0.3.1 +geglu,huggingface,forward,speed,ms,T,sequence length,8192,34.77017593383789,34.77017593383789,34.77017593383789,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:42:57,0.3.1 +geglu,liger,backward,speed,ms,T,sequence length,1024,9.047189712524414,9.047189712524414,9.047189712524414,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:01,0.3.1 +geglu,liger,backward,speed,ms,T,sequence length,2048,17.675949096679688,17.675949096679688,17.675949096679688,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:01,0.3.1 +geglu,liger,backward,speed,ms,T,sequence length,4096,35.2852668762207,35.2852668762207,35.2852668762207,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:01,0.3.1 +geglu,liger,backward,speed,ms,T,sequence length,8192,71.63275909423828,71.63275909423828,71.63275909423828,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:01,0.3.1 +geglu,huggingface,backward,speed,ms,T,sequence length,1024,9.093975067138672,9.093975067138672,9.093975067138672,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:05,0.3.1 +geglu,huggingface,backward,speed,ms,T,sequence length,2048,17.87596321105957,17.87596321105957,17.87596321105957,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:05,0.3.1 +geglu,huggingface,backward,speed,ms,T,sequence length,4096,35.5590934753418,35.5590934753418,35.5590934753418,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:05,0.3.1 +geglu,huggingface,backward,speed,ms,T,sequence length,8192,72.27393341064453,72.27393341064453,72.27393341064453,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:05,0.3.1 +geglu,liger,full,memory,MB,T,sequence length,1024,1566.0,1566.0,1566.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:09,0.3.1 +geglu,liger,full,memory,MB,T,sequence length,2048,2530.0,2530.0,2530.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:09,0.3.1 +geglu,liger,full,memory,MB,T,sequence length,4096,4458.0,4458.0,4458.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:09,0.3.1 +geglu,liger,full,memory,MB,T,sequence length,8192,8314.0,8314.0,8314.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:09,0.3.1 +geglu,huggingface,full,memory,MB,T,sequence length,1024,1976.0,1976.0,1976.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:12,0.3.1 +geglu,huggingface,full,memory,MB,T,sequence length,2048,3436.0,3436.0,3436.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:12,0.3.1 +geglu,huggingface,full,memory,MB,T,sequence length,4096,6356.0,6356.0,6356.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:12,0.3.1 +geglu,huggingface,full,memory,MB,T,sequence length,8192,12196.0,12196.0,12196.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:12,0.3.1 +geglu,liger,forward,memory,MB,T,sequence length,1024,903.0,903.0,903.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:15,0.3.1 +geglu,liger,forward,memory,MB,T,sequence length,2048,1547.0,1547.0,1547.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:15,0.3.1 +geglu,liger,forward,memory,MB,T,sequence length,4096,2835.0,2835.0,2835.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:15,0.3.1 +geglu,liger,forward,memory,MB,T,sequence length,8192,5411.0,5411.0,5411.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:15,0.3.1 +geglu,huggingface,forward,memory,MB,T,sequence length,1024,1075.0,1075.0,1075.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:18,0.3.1 +geglu,huggingface,forward,memory,MB,T,sequence length,2048,1891.0,1891.0,1891.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:18,0.3.1 +geglu,huggingface,forward,memory,MB,T,sequence length,4096,3523.0,3523.0,3523.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:18,0.3.1 +geglu,huggingface,forward,memory,MB,T,sequence length,8192,6787.0,6787.0,6787.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:18,0.3.1 +geglu,liger,backward,memory,MB,T,sequence length,1024,1566.0,1566.0,1566.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:21,0.3.1 +geglu,liger,backward,memory,MB,T,sequence length,2048,2530.0,2530.0,2530.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:21,0.3.1 +geglu,liger,backward,memory,MB,T,sequence length,4096,4458.0,4458.0,4458.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:21,0.3.1 +geglu,liger,backward,memory,MB,T,sequence length,8192,8314.0,8314.0,8314.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:21,0.3.1 +geglu,huggingface,backward,memory,MB,T,sequence length,1024,1976.0,1976.0,1976.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:25,0.3.1 +geglu,huggingface,backward,memory,MB,T,sequence length,2048,3436.0,3436.0,3436.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:25,0.3.1 +geglu,huggingface,backward,memory,MB,T,sequence length,4096,6356.0,6356.0,6356.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:25,0.3.1 +geglu,huggingface,backward,memory,MB,T,sequence length,8192,12196.0,12196.0,12196.0,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",AMD Instinct MI300X,2024-10-27 13:43:25,0.3.1 +rms_norm,liger,forward,speed,ms,H,hidden size,1024,0.04494199901819229,0.04333839938044548,0.04689040035009384,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:21,0.3.1 +rms_norm,liger,forward,speed,ms,H,hidden size,2048,0.04017199948430061,0.03840700164437294,0.04209500178694725,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:21,0.3.1 +rms_norm,liger,forward,speed,ms,H,hidden size,4096,0.03906799852848053,0.037845999002456665,0.04056279733777046,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:21,0.3.1 +rms_norm,liger,forward,speed,ms,H,hidden size,8192,0.03680400177836418,0.03608199954032898,0.03859919682145119,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:21,0.3.1 +rms_norm,liger,forward,speed,ms,H,hidden size,16384,0.06488700211048126,0.06213099882006645,0.06776399910449982,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:21,0.3.1 +rms_norm,liger,forward,speed,ms,H,hidden size,32768,0.13025599718093872,0.1290930062532425,0.13145899772644043,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:21,0.3.1 +rms_norm,huggingface,forward,speed,ms,H,hidden size,1024,0.054763998836278915,0.05396300181746483,0.0558059997856617,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:25,0.3.1 +rms_norm,huggingface,forward,speed,ms,H,hidden size,2048,0.07801699638366699,0.07729600369930267,0.07905899733304977,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:25,0.3.1 +rms_norm,huggingface,forward,speed,ms,H,hidden size,4096,0.1303659975528717,0.12907299399375916,0.1328520029783249,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:25,0.3.1 +rms_norm,huggingface,forward,speed,ms,H,hidden size,8192,0.23198199272155762,0.2280699908733368,0.26100099086761475,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:25,0.3.1 +rms_norm,huggingface,forward,speed,ms,H,hidden size,16384,0.43326398730278015,0.4265280067920685,0.4626828134059906,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:25,0.3.1 +rms_norm,huggingface,forward,speed,ms,H,hidden size,32768,0.9901285171508789,0.9613360166549683,1.019419550895691,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:25,0.3.1 +rms_norm,liger,full,speed,ms,H,hidden size,1024,0.7101730108261108,0.6372388005256653,0.7210777997970581,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:29,0.3.1 +rms_norm,liger,full,speed,ms,H,hidden size,2048,0.717088520526886,0.7082564234733582,0.7253596186637878,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:29,0.3.1 +rms_norm,liger,full,speed,ms,H,hidden size,4096,0.7206770181655884,0.7119570374488831,0.7287229895591736,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:29,0.3.1 +rms_norm,liger,full,speed,ms,H,hidden size,8192,0.7162665128707886,0.7065649628639221,0.7238799929618835,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:29,0.3.1 +rms_norm,liger,full,speed,ms,H,hidden size,16384,0.7163670063018799,0.7057341933250427,0.7250019907951355,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:29,0.3.1 +rms_norm,liger,full,speed,ms,H,hidden size,32768,0.7410880327224731,0.73785799741745,0.7610667943954468,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:29,0.3.1 +rms_norm,huggingface,full,speed,ms,H,hidden size,1024,0.8654059767723083,0.7122011780738831,0.8754922151565552,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:32,0.3.1 +rms_norm,huggingface,full,speed,ms,H,hidden size,2048,0.8757489919662476,0.869575023651123,0.880961000919342,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:32,0.3.1 +rms_norm,huggingface,full,speed,ms,H,hidden size,4096,0.8704169988632202,0.8663780093193054,0.8753979802131653,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:32,0.3.1 +rms_norm,huggingface,full,speed,ms,H,hidden size,8192,0.9989489912986755,0.969061017036438,1.0288059711456299,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:32,0.3.1 +rms_norm,huggingface,full,speed,ms,H,hidden size,16384,1.927377462387085,1.9015731811523438,1.963661551475525,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:32,0.3.1 +rms_norm,huggingface,full,speed,ms,H,hidden size,32768,3.9152119159698486,3.8668220043182373,3.9596729278564453,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:32,0.3.1 +rms_norm,liger,backward,speed,ms,H,hidden size,1024,0.3544450104236603,0.3410226106643677,0.3610199987888336,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:36,0.3.1 +rms_norm,liger,backward,speed,ms,H,hidden size,2048,0.3849940001964569,0.38034340739250183,0.39015039801597595,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:36,0.3.1 +rms_norm,liger,backward,speed,ms,H,hidden size,4096,0.38555049896240234,0.3807849884033203,0.39119458198547363,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:36,0.3.1 +rms_norm,liger,backward,speed,ms,H,hidden size,8192,0.38363099098205566,0.37856540083885193,0.38900941610336304,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:36,0.3.1 +rms_norm,liger,backward,speed,ms,H,hidden size,16384,0.3850594758987427,0.380596399307251,0.39024099707603455,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:36,0.3.1 +rms_norm,liger,backward,speed,ms,H,hidden size,32768,0.6563155055046082,0.6314539909362793,0.665431022644043,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:36,0.3.1 +rms_norm,huggingface,backward,speed,ms,H,hidden size,1024,0.4819749891757965,0.4746460020542145,0.5170620083808899,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,backward,speed,ms,H,hidden size,2048,0.4807319939136505,0.47564101219177246,0.4856067895889282,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,backward,speed,ms,H,hidden size,4096,0.4844900071620941,0.4777750074863434,0.49538499116897583,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,backward,speed,ms,H,hidden size,8192,0.7680795192718506,0.7393749952316284,0.7956370115280151,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,backward,speed,ms,H,hidden size,16384,1.5266975164413452,1.499796986579895,1.5570260286331177,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,backward,speed,ms,H,hidden size,32768,2.9262759685516357,2.8937718868255615,2.9610214233398438,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,liger,full,memory,MB,H,hidden size,1024,13.2109375,13.2109375,13.2109375,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,liger,full,memory,MB,H,hidden size,2048,26.4140625,26.4140625,26.4140625,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,liger,full,memory,MB,H,hidden size,4096,52.8203125,52.8203125,52.8203125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,liger,full,memory,MB,H,hidden size,8192,105.6328125,105.6328125,105.6328125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,liger,full,memory,MB,H,hidden size,16384,212.2578125,212.2578125,212.2578125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,liger,full,memory,MB,H,hidden size,32768,422.5078125,422.5078125,422.5078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,full,memory,MB,H,hidden size,1024,120.02392578125,120.02392578125,120.02392578125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,full,memory,MB,H,hidden size,2048,240.03955078125,240.03955078125,240.03955078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,full,memory,MB,H,hidden size,4096,480.07080078125,480.07080078125,480.07080078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,full,memory,MB,H,hidden size,8192,960.13330078125,960.13330078125,960.13330078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,full,memory,MB,H,hidden size,16384,1920.25830078125,1920.25830078125,1920.25830078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1 +rms_norm,huggingface,full,memory,MB,H,hidden size,32768,3008.50830078125,3008.50830078125,3008.50830078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",AMD Instinct MI300X,2024-10-27 13:33:39,0.3.1