diff --git a/.gitignore b/.gitignore index 08c1ce1..0be9850 100644 --- a/.gitignore +++ b/.gitignore @@ -171,6 +171,7 @@ Thumbs.db # Checkpoints (if you don't want to track them) checkpoints/ checkpoints_test/ +checkpoints_optimized/ # Training artifacts (discovered knowledge) # Ignore all images except those in docs/images diff --git a/inference_benchmarks/inference_metrics.csv b/inference_benchmarks/inference_metrics.csv index 2296560..192235b 100644 --- a/inference_benchmarks/inference_metrics.csv +++ b/inference_benchmarks/inference_metrics.csv @@ -399,3 +399,203 @@ run_1763328498_non_optimized,1763328498.633191,False,105,0,0.0002338886260986328 run_1763328498_optimized,1763328498.6372032,True,105,0,0.0003268718719482422,0,0,73.2470703125,,cuda run_1763328501_non_optimized,1763328501.9915178,False,110,0,0.00023627281188964844,0,0,73.24609375,,cuda run_1763328501_optimized,1763328501.9954562,True,110,0,0.0003197193145751953,0,0,73.2470703125,,cuda +run_1763526557_non_optimized,1763526558.0903924,False,53,47,0.35442304611206055,132.60988673163106,7.540915874724693,83.77001953125,,cuda +run_1763526557_optimized,1763526558.221367,True,53,47,0.12709331512451172,369.8070189919485,2.7041130877555686,86.13916015625,,cuda +run_1763526561_non_optimized,1763526561.392078,False,66,34,0.3155982494354248,107.73190301537717,9.282301453983083,83.7705078125,,cuda +run_1763526561_optimized,1763526561.485392,True,66,34,0.08941459655761719,380.25111457155657,2.629841075224035,86.14013671875,,cuda +run_1763526564_non_optimized,1763526564.6852732,False,60,40,0.3313627243041992,120.71363815586875,8.28406810760498,83.77001953125,,cuda +run_1763526564_optimized,1763526564.794894,True,60,40,0.10575437545776367,378.2349413616012,2.643859386444092,86.13916015625,,cuda +run_1763526567_non_optimized,1763526567.9853783,False,61,39,0.33505964279174805,116.39718730387337,8.591272892096104,83.77001953125,,cuda +run_1763526567_optimized,1763526568.0917747,True,61,39,0.10251092910766602,380.4472395234928,2.628485361735026,86.13916015625,,cuda +run_1763526570_non_optimized,1763526571.282682,False,60,40,0.33055973052978516,121.00687502344086,8.263993263244629,83.77001953125,,cuda +run_1763526570_optimized,1763526571.3913667,True,60,40,0.10458731651306152,382.4555532506138,2.614682912826538,86.13916015625,,cuda +run_1763526574_non_optimized,1763526574.5597281,False,64,36,0.3207573890686035,112.23435913521645,8.909927474127876,83.77001953125,,cuda +run_1763526574_optimized,1763526574.657765,True,64,36,0.09418869018554688,382.211494066664,2.61635250515408,86.13916015625,,cuda +run_1763526577_non_optimized,1763526577.870434,False,54,46,0.34548068046569824,133.1478215742579,7.510449575341266,83.77001953125,,cuda +run_1763526577_optimized,1763526577.996214,True,54,46,0.12188839912414551,377.39440611692817,2.649747807046641,86.13916015625,,cuda +run_1763526580_non_optimized,1763526581.1706474,False,67,33,0.31485915184020996,104.80876864188276,9.5411864194003,83.7705078125,,cuda +run_1763526580_optimized,1763526581.2631214,True,67,33,0.08859968185424805,372.4618340536148,2.684838844068123,86.14013671875,,cuda +run_1763526584_non_optimized,1763526584.4379666,False,63,37,0.32149577140808105,115.08705025247488,8.689074902921108,83.77001953125,,cuda +run_1763526584_optimized,1763526584.538549,True,63,37,0.09666585922241211,382.7618178499726,2.612590789794922,86.13916015625,,cuda +run_1763526587_non_optimized,1763526587.7008932,False,73,27,0.30018019676208496,89.94597342275547,11.117785065262407,83.7705078125,,cuda +run_1763526587_optimized,1763526587.775923,True,73,27,0.0710904598236084,379.79779659595874,2.6329799934669778,86.14013671875,,cuda +run_1763526590_non_optimized,1763526590.9904318,False,46,54,0.36146068572998047,149.3938404143328,6.693716402407046,83.77001953125,,cuda +run_1763526590_optimized,1763526591.1326938,True,46,54,0.1380624771118164,391.1272717225373,2.556712539107711,86.13916015625,,cuda +run_1763526593_non_optimized,1763526594.285255,False,70,30,0.3071746826171875,97.66429884258109,10.239156087239582,83.7705078125,,cuda +run_1763526593_optimized,1763526594.3690982,True,70,30,0.07981586456298828,375.8651261156847,2.6605288187662763,86.14013671875,,cuda +run_1763526597_non_optimized,1763526597.541682,False,66,34,0.3228602409362793,105.30872398967932,9.495889439302333,83.7705078125,,cuda +run_1763526597_optimized,1763526597.6348116,True,66,34,0.08911824226379395,381.515603722956,2.621124772464528,86.14013671875,,cuda +run_1763526600_non_optimized,1763526600.8545513,False,50,50,0.3591768741607666,139.20718063162423,7.183537483215332,83.77001953125,,cuda +run_1763526600_optimized,1763526600.9873838,True,50,50,0.12882018089294434,388.13794277740044,2.5764036178588867,86.13916015625,,cuda +run_1763526603_non_optimized,1763526604.1675096,False,66,34,0.323777437210083,105.01040558282972,9.52286580029656,83.7705078125,,cuda +run_1763526603_optimized,1763526604.2624831,True,66,34,0.09089446067810059,374.060198458185,2.6733664905323704,86.14013671875,,cuda +run_1763526607_non_optimized,1763526607.4869204,False,43,57,0.37198734283447266,153.23102008168036,6.526093733938117,83.77001953125,,cuda +run_1763526607_optimized,1763526607.6380951,True,43,57,0.14709210395812988,387.51230328600906,2.580563227335612,86.13916015625,,cuda +run_1763526610_non_optimized,1763526610.8189526,False,61,39,0.325711727142334,119.73778267724835,8.351582747239332,83.77001953125,,cuda +run_1763526610_optimized,1763526610.9252608,True,61,39,0.10221171379089355,381.5609635485308,2.620813174125476,86.13916015625,,cuda +run_1763526613_non_optimized,1763526614.11627,False,58,42,0.3369486331939697,124.64807944723742,8.022586504618326,83.77001953125,,cuda +run_1763526613_optimized,1763526614.2310555,True,58,42,0.11066627502441406,379.51941538408505,2.634911310105097,86.13916015625,,cuda +run_1763526617_non_optimized,1763526617.3865545,False,66,34,0.3230869770050049,105.23482040402178,9.502558147206026,83.7705078125,,cuda +run_1763526617_optimized,1763526617.4830964,True,66,34,0.09008550643920898,377.41920253224856,2.6495737188002644,86.14013671875,,cuda +run_1763526620_non_optimized,1763526620.6447241,False,67,33,0.32021570205688477,103.05553346705562,9.703506122935902,83.7705078125,,cuda +run_1763526620_optimized,1763526620.7375038,True,67,33,0.08865785598754883,372.21743783917515,2.6866016965923887,86.14013671875,,cuda +run_1763526623_non_optimized,1763526623.9323657,False,59,41,0.34384703636169434,119.2390675627982,8.386513081992545,83.77001953125,,cuda +run_1763526623_optimized,1763526624.045949,True,59,41,0.10931515693664551,375.06235319006936,2.666223339918183,86.13916015625,,cuda +run_1763526626_non_optimized,1763526627.2393768,False,63,37,0.3268465995788574,113.20295223408958,8.83369188050966,83.77001953125,,cuda +run_1763526626_optimized,1763526627.3409727,True,63,37,0.09740710258483887,379.84909742874277,2.632624394184834,86.13916015625,,cuda +run_1763526630_non_optimized,1763526630.5474806,False,53,47,0.35274362564086914,133.2412454360013,7.505183524273812,83.77001953125,,cuda +run_1763526630_optimized,1763526630.6753845,True,53,47,0.12363100051879883,380.1635496175846,2.630446819548911,86.13916015625,,cuda +run_1763526633_non_optimized,1763526633.845649,False,67,33,0.3140745162963867,105.07060677555408,9.51740958473899,83.7705078125,,cuda +run_1763526633_optimized,1763526633.9381201,True,67,33,0.08824992179870605,373.9380084128738,2.674240054506244,86.14013671875,,cuda +run_1763526636_non_optimized,1763526637.118738,False,64,36,0.32010531425476074,112.4629876383397,8.891814284854464,83.77001953125,,cuda +run_1763526636_optimized,1763526637.2179773,True,64,36,0.09497523307800293,379.04618744587117,2.6382009188334146,86.13916015625,,cuda +run_1763526640_non_optimized,1763526640.4208992,False,54,46,0.34542250633239746,133.17024558826677,7.509184920269511,83.77001953125,,cuda +run_1763526640_optimized,1763526640.5462801,True,54,46,0.12082409858703613,380.718751788276,2.6266108388486114,86.13916015625,,cuda +run_1763526643_non_optimized,1763526643.7587085,False,51,49,0.35083961486816406,139.66495778537683,7.159992140166613,83.77001953125,,cuda +run_1763526643_optimized,1763526643.8911211,True,51,49,0.12817978858947754,382.2755563822367,2.61591405284648,86.13916015625,,cuda +run_1763526646_non_optimized,1763526647.0730722,False,60,40,0.33283185958862305,120.18080255129306,8.320796489715576,83.77001953125,,cuda +run_1763526646_optimized,1763526647.1849632,True,60,40,0.10762190818786621,371.6715367114238,2.6905477046966553,86.13916015625,,cuda +run_1763526650_non_optimized,1763526650.3852422,False,56,44,0.34725308418273926,126.70873781741659,7.89211554960771,83.77001953125,,cuda +run_1763526650_optimized,1763526650.5039096,True,56,44,0.11433672904968262,384.8282206925889,2.598562023856423,86.13916015625,,cuda +run_1763526653_non_optimized,1763526653.6856203,False,65,35,0.31678080558776855,110.48649218206107,9.05088015965053,83.7705078125,,cuda +run_1763526653_optimized,1763526653.7829595,True,65,35,0.09303498268127441,376.2025744649772,2.658142362322126,86.14013671875,,cuda +run_1763526656_non_optimized,1763526656.986791,False,59,41,0.33247804641723633,123.31641274307752,8.109220644322837,83.77001953125,,cuda +run_1763526656_optimized,1763526657.097655,True,59,41,0.10654258728027344,384.8226427254337,2.598599689762767,86.13916015625,,cuda +run_1763526659_non_optimized,1763526660.297603,False,54,46,0.3501875400543213,131.35818593906697,7.61277260987655,83.77001953125,,cuda +run_1763526659_optimized,1763526660.426392,True,54,46,0.12442255020141602,369.7079020284901,2.7048380478568697,86.13916015625,,cuda +run_1763526663_non_optimized,1763526663.6286657,False,54,46,0.3505382537841797,131.22676199648498,7.62039682139521,83.77001953125,,cuda +run_1763526663_optimized,1763526663.7522638,True,54,46,0.11925172805786133,385.7386450423649,2.5924288708230723,86.13916015625,,cuda +run_1763526666_non_optimized,1763526666.9662895,False,59,41,0.33091211318969727,123.8999672897937,8.071027150968225,83.77001953125,,cuda +run_1763526666_optimized,1763526667.077263,True,59,41,0.10660624504089355,384.5928536763736,2.6001523180705743,86.13916015625,,cuda +run_1763526669_non_optimized,1763526670.2709506,False,65,35,0.32552552223205566,107.51845127230834,9.300729206630162,83.7705078125,,cuda +run_1763526669_optimized,1763526670.3674176,True,65,35,0.09210538864135742,379.9994822917907,2.631582532610212,86.14013671875,,cuda +run_1763526673_non_optimized,1763526673.5689995,False,60,40,0.32962512969970703,121.34997121257273,8.240628242492676,83.77001953125,,cuda +run_1763526673_optimized,1763526673.678643,True,60,40,0.10524392127990723,380.0694568726284,2.6310980319976807,86.13916015625,,cuda +run_1763526676_non_optimized,1763526676.8867915,False,56,44,0.3409693241119385,129.04386667216733,7.749302820725875,83.77001953125,,cuda +run_1763526676_optimized,1763526677.0066595,True,56,44,0.11539912223815918,381.2853958212384,2.622707323594527,86.13916015625,,cuda +run_1763526679_non_optimized,1763526680.210214,False,59,41,0.34114503860473633,120.18348608465084,8.320610697676496,83.77001953125,,cuda +run_1763526679_optimized,1763526680.3233387,True,59,41,0.10867524147033691,377.27084334283273,2.6506156456179735,86.13916015625,,cuda +run_1763526683_non_optimized,1763526683.5293164,False,49,51,0.3626677989959717,140.6245609375606,7.111133313646503,83.77001953125,,cuda +run_1763526683_optimized,1763526683.6654575,True,49,51,0.13167476654052734,387.31794511519433,2.581858167461321,86.13916015625,,cuda +run_1763526686_non_optimized,1763526686.8576732,False,57,43,0.3366522789001465,127.7282308632585,7.829122765119686,83.77001953125,,cuda +run_1763526686_optimized,1763526686.973754,True,57,43,0.11158990859985352,385.33950371975163,2.5951141534849658,86.13916015625,,cuda +run_1763526689_non_optimized,1763526690.151102,False,68,32,0.3112297058105469,102.81794893794354,9.72592830657959,83.7705078125,,cuda +run_1763526689_optimized,1763526690.239994,True,68,32,0.08438873291015625,379.19754090950187,2.637147903442383,86.14013671875,,cuda +run_1763526693_non_optimized,1763526693.4361937,False,57,43,0.33553242683410645,128.15452862700513,7.803079693816429,83.77001953125,,cuda +run_1763526693_optimized,1763526693.5529184,True,57,43,0.11221909523010254,383.1789938408391,2.6097464007000593,86.13916015625,,cuda +run_1763526696_non_optimized,1763526696.7601304,False,57,43,0.34420084953308105,124.92705947219716,8.004670919373979,83.77001953125,,cuda +run_1763526696_optimized,1763526696.8780057,True,57,43,0.11339879035949707,379.192757380227,2.6371811711510946,86.13916015625,,cuda +run_1763526699_non_optimized,1763526700.0553536,False,62,38,0.32698941230773926,116.21171380386193,8.604984534414191,83.77001953125,,cuda +run_1763526699_optimized,1763526700.160437,True,62,38,0.10051822662353516,378.04089145264277,2.6452164900930306,86.13916015625,,cuda +run_1763526702_non_optimized,1763526703.3607254,False,48,52,0.3581371307373047,145.19577987612305,6.887252514178936,83.77001953125,,cuda +run_1763526702_optimized,1763526703.501407,True,48,52,0.1360619068145752,382.17897439042554,2.6165751310495233,86.13916015625,,cuda +run_1763526706_non_optimized,1763526706.6655996,False,65,35,0.32056474685668945,109.18231135267965,9.158992767333984,83.7705078125,,cuda +run_1763526706_optimized,1763526706.7639794,True,65,35,0.09380245208740234,373.1245742636668,2.680070059640067,86.14013671875,,cuda +run_1763526709_non_optimized,1763526709.9760077,False,54,46,0.3535304069519043,130.11610626821704,7.685443629389224,83.77001953125,,cuda +run_1763526709_optimized,1763526710.1015396,True,54,46,0.12090945243835449,380.44999023920843,2.6284663573555327,86.13916015625,,cuda +run_1763526712_non_optimized,1763526713.2750318,False,60,40,0.3372828960418701,118.59480711715196,8.432072401046753,83.77001953125,,cuda +run_1763526712_optimized,1763526713.3839555,True,60,40,0.1043252944946289,383.41612351795817,2.6081323623657227,86.13916015625,,cuda +run_1763526716_non_optimized,1763526716.5846882,False,56,44,0.3465569019317627,126.96327718402686,7.87629322572188,83.77001953125,,cuda +run_1763526716_optimized,1763526716.704748,True,56,44,0.11548757553100586,380.9933648506369,2.6247176257046787,86.13916015625,,cuda +run_1763526719_non_optimized,1763526719.9162998,False,55,45,0.34871912002563477,129.04368420260982,7.74931377834744,83.77001953125,,cuda +run_1763526719_optimized,1763526720.0379987,True,55,45,0.11701393127441406,384.5695936364081,2.6003095838758683,86.13916015625,,cuda +run_1763526722_non_optimized,1763526723.1653128,False,86,14,0.2764289379119873,50.64592768669351,19.74492413657052,83.7705078125,,cuda +run_1763526722_optimized,1763526723.2101393,True,86,14,0.04014396667480469,348.74480923647076,2.867426191057478,86.14013671875,,cuda +run_1763526726_non_optimized,1763526726.321796,False,87,13,0.2667272090911865,48.73893460024045,20.517477622398964,83.7705078125,,cuda +run_1763526726_optimized,1763526726.3628628,True,87,13,0.03642010688781738,356.9456849767932,2.80154668367826,86.14013671875,,cuda +run_1763526729_non_optimized,1763526729.4611804,False,92,8,0.2541630268096924,31.47586059395687,31.770378351211548,83.7705078125,,cuda +run_1763526729_optimized,1763526729.489691,True,92,8,0.02371382713317871,337.3559212973669,2.964228391647339,86.14013671875,,cuda +run_1763526732_non_optimized,1763526732.6022036,False,91,9,0.25944042205810547,34.69004532371721,28.82671356201172,83.7705078125,,cuda +run_1763526732_optimized,1763526732.6337376,True,91,9,0.02675771713256836,336.3515637530072,2.9730796813964844,86.14013671875,,cuda +run_1763526735_non_optimized,1763526735.746593,False,96,4,0.24565601348876953,16.28293133635365,61.41400337219238,83.7705078125,,cuda +run_1763526735_optimized,1763526735.7645717,True,96,4,0.013253450393676758,301.80819946392273,3.3133625984191895,86.14013671875,,cuda +run_1763526738_non_optimized,1763526738.6402805,False,100,0,0.0002372264862060547,0,0,73.24609375,,cuda +run_1763526738_optimized,1763526738.7505527,True,100,0,0.10556960105895996,0,0,86.09814453125,,cuda +run_1763526741_non_optimized,1763526741.855399,False,98,2,0.23850727081298828,8.385488598241453,119.25363540649414,83.7705078125,,cuda +run_1763526741_optimized,1763526741.8686676,True,98,2,0.008454322814941406,236.56536943034405,4.227161407470703,86.14013671875,,cuda +run_1763526744_non_optimized,1763526744.9965777,False,95,5,0.2477104663848877,20.184855621849653,49.54209327697754,83.7705078125,,cuda +run_1763526744_optimized,1763526745.0179746,True,95,5,0.016021251678466797,312.0854787344862,3.2042503356933594,86.14013671875,,cuda +run_1763526747_non_optimized,1763526748.1259825,False,99,1,0.2374560832977295,4.211305038440183,237.4560832977295,83.72900390625,,cuda +run_1763526747_optimized,1763526748.1369371,True,99,1,0.006153583526611328,162.50693529639673,6.153583526611328,86.14013671875,,cuda +run_1763526750_non_optimized,1763526750.990292,False,107,0,0.0002415180206298828,0,0,73.24609375,,cuda +run_1763526750_optimized,1763526751.1003447,True,107,0,0.10532331466674805,0,0,86.431640625,,cuda +run_1763526753_non_optimized,1763526753.9551547,False,111,0,0.0002372264862060547,0,0,73.24609375,,cuda +run_1763526753_optimized,1763526754.065468,True,111,0,0.10554146766662598,0,0,86.62255859375,,cuda +run_1763526756_non_optimized,1763526756.9100564,False,104,0,0.000232696533203125,0,0,73.24609375,,cuda +run_1763526756_optimized,1763526757.0201974,True,104,0,0.10536527633666992,0,0,86.28857421875,,cuda +run_1763526759_non_optimized,1763526760.134444,False,97,3,0.24916291236877441,12.04031519570553,83.0543041229248,83.7705078125,,cuda +run_1763526759_optimized,1763526760.1502607,True,97,3,0.01096487045288086,273.6010437051533,3.654956817626953,86.14013671875,,cuda +run_1763526763_non_optimized,1763526763.0106947,False,102,0,0.0002446174621582031,0,0,73.24609375,,cuda +run_1763526763_optimized,1763526763.1213396,True,102,0,0.1058340072631836,0,0,86.193359375,,cuda +run_1763526765_non_optimized,1763526765.988826,False,102,0,0.0002529621124267578,0,0,73.24609375,,cuda +run_1763526765_optimized,1763526766.099505,True,102,0,0.10576152801513672,0,0,86.193359375,,cuda +run_1763526768_non_optimized,1763526768.9637494,False,105,0,0.00024008750915527344,0,0,73.24609375,,cuda +run_1763526768_optimized,1763526769.0741549,True,105,0,0.10558032989501953,0,0,86.33642578125,,cuda +run_1763526771_non_optimized,1763526772.1941426,False,94,6,0.25710487365722656,23.33678049214745,42.85081227620443,83.7705078125,,cuda +run_1763526771_optimized,1763526772.217405,True,94,6,0.01833653450012207,327.215592453419,3.0560890833536782,86.14013671875,,cuda +run_1763526775_non_optimized,1763526775.0632038,False,102,0,0.00023651123046875,0,0,73.24609375,,cuda +run_1763526775_optimized,1763526775.1732247,True,102,0,0.10519266128540039,0,0,86.193359375,,cuda +run_1763526778_non_optimized,1763526778.0269144,False,103,0,0.00025081634521484375,0,0,73.24609375,,cuda +run_1763526778_optimized,1763526778.1373425,True,103,0,0.1055910587310791,0,0,86.24072265625,,cuda +run_1763526780_non_optimized,1763526780.9818347,False,107,0,0.00024700164794921875,0,0,73.24609375,,cuda +run_1763526780_optimized,1763526781.0923955,True,107,0,0.10562825202941895,0,0,86.431640625,,cuda +run_1763526783_non_optimized,1763526783.948955,False,110,0,0.00024271011352539062,0,0,73.24609375,,cuda +run_1763526783_optimized,1763526784.0592883,True,110,0,0.10540580749511719,0,0,86.57470703125,,cuda +run_1763526786_non_optimized,1763526786.926905,False,106,0,0.0002503395080566406,0,0,73.24609375,,cuda +run_1763526786_optimized,1763526787.0370946,True,106,0,0.10536813735961914,0,0,86.3837890625,,cuda +run_1763526789_non_optimized,1763526789.8929183,False,109,0,0.00025582313537597656,0,0,73.24609375,,cuda +run_1763526789_optimized,1763526790.0033913,True,109,0,0.10552310943603516,0,0,86.52685546875,,cuda +run_1763526792_non_optimized,1763526792.8616316,False,110,0,0.0002377033233642578,0,0,73.24609375,,cuda +run_1763526792_optimized,1763526792.972686,True,110,0,0.10608291625976562,0,0,86.57470703125,,cuda +run_1763526795_non_optimized,1763526795.8468068,False,114,0,0.00024056434631347656,0,0,73.24609375,,cuda +run_1763526795_optimized,1763526795.9571438,True,114,0,0.1053767204284668,0,0,86.765625,,cuda +run_1763526798_non_optimized,1763526798.8149438,False,113,0,0.00024628639221191406,0,0,73.24609375,,cuda +run_1763526798_optimized,1763526798.9252326,True,113,0,0.10529732704162598,0,0,86.7177734375,,cuda +run_1763526801_non_optimized,1763526801.7890985,False,113,0,0.0002460479736328125,0,0,73.24609375,,cuda +run_1763526801_optimized,1763526801.899515,True,113,0,0.1053619384765625,0,0,86.7177734375,,cuda +run_1763526804_non_optimized,1763526804.7603977,False,112,0,0.00023674964904785156,0,0,73.24609375,,cuda +run_1763526804_optimized,1763526804.8709013,True,112,0,0.10542559623718262,0,0,86.669921875,,cuda +run_1763526807_non_optimized,1763526807.7322128,False,109,0,0.0002338886260986328,0,0,73.24609375,,cuda +run_1763526807_optimized,1763526807.842512,True,109,0,0.10524344444274902,0,0,86.52685546875,,cuda +run_1763526810_non_optimized,1763526810.7003376,False,102,0,0.00023674964904785156,0,0,73.24609375,,cuda +run_1763526810_optimized,1763526810.8109784,True,102,0,0.10563492774963379,0,0,86.193359375,,cuda +run_1763526813_non_optimized,1763526813.6818757,False,105,0,0.0002524852752685547,0,0,73.24609375,,cuda +run_1763526813_optimized,1763526813.7921646,True,105,0,0.10520672798156738,0,0,86.33642578125,,cuda +run_1763526816_non_optimized,1763526816.6591551,False,109,0,0.00023221969604492188,0,0,73.24609375,,cuda +run_1763526816_optimized,1763526816.7695994,True,109,0,0.10536456108093262,0,0,86.52685546875,,cuda +run_1763526819_non_optimized,1763526819.6383152,False,103,0,0.00023484230041503906,0,0,73.24609375,,cuda +run_1763526819_optimized,1763526819.7488954,True,103,0,0.10547161102294922,0,0,86.24072265625,,cuda +run_1763526822_non_optimized,1763526822.6187825,False,102,0,0.0002391338348388672,0,0,73.24609375,,cuda +run_1763526822_optimized,1763526822.7292917,True,102,0,0.10544276237487793,0,0,86.193359375,,cuda +run_1763526825_non_optimized,1763526825.583301,False,107,0,0.00023412704467773438,0,0,73.24609375,,cuda +run_1763526825_optimized,1763526825.6933794,True,107,0,0.10494589805603027,0,0,86.431640625,,cuda +run_1763526828_non_optimized,1763526828.5638628,False,112,0,0.00024127960205078125,0,0,73.24609375,,cuda +run_1763526828_optimized,1763526828.6741154,True,112,0,0.1051325798034668,0,0,86.669921875,,cuda +run_1763526831_non_optimized,1763526831.545799,False,105,0,0.00023818016052246094,0,0,73.24609375,,cuda +run_1763526831_optimized,1763526831.6566033,True,105,0,0.10563468933105469,0,0,86.33642578125,,cuda +run_1763526834_non_optimized,1763526834.7725186,False,97,3,0.2505476474761963,11.973770379484487,83.51588249206543,83.7705078125,,cuda +run_1763526834_optimized,1763526834.7885733,True,97,3,0.01080775260925293,277.5785224239483,3.60258420308431,86.14013671875,,cuda +run_1763526837_non_optimized,1763526837.7080257,False,102,0,0.00025272369384765625,0,0,73.24609375,,cuda +run_1763526837_optimized,1763526837.8196452,True,102,0,0.10632824897766113,0,0,86.193359375,,cuda +run_1763526840_non_optimized,1763526840.7012856,False,102,0,0.00023818016052246094,0,0,73.24609375,,cuda +run_1763526840_optimized,1763526840.8124819,True,102,0,0.10594463348388672,0,0,86.193359375,,cuda +run_1763526843_non_optimized,1763526843.7108738,False,109,0,0.0002484321594238281,0,0,73.24609375,,cuda +run_1763526843_optimized,1763526843.8216114,True,109,0,0.10553407669067383,0,0,86.52685546875,,cuda +run_1763526846_non_optimized,1763526846.7072785,False,112,0,0.00023412704467773438,0,0,73.24609375,,cuda +run_1763526846_optimized,1763526846.8180177,True,112,0,0.10541939735412598,0,0,86.669921875,,cuda +run_1763526849_non_optimized,1763526849.695143,False,107,0,0.0002357959747314453,0,0,73.24609375,,cuda +run_1763526849_optimized,1763526849.806518,True,107,0,0.10609865188598633,0,0,86.431640625,,cuda +run_1763526852_non_optimized,1763526852.698653,False,106,0,0.0002410411834716797,0,0,73.24609375,,cuda +run_1763526852_optimized,1763526852.8100939,True,106,0,0.10619211196899414,0,0,86.3837890625,,cuda +run_1763526855_non_optimized,1763526855.6878016,False,105,0,0.00023484230041503906,0,0,73.24609375,,cuda +run_1763526855_optimized,1763526855.798402,True,105,0,0.10535454750061035,0,0,86.33642578125,,cuda +run_1763526858_non_optimized,1763526858.673868,False,102,0,0.00024962425231933594,0,0,73.24609375,,cuda +run_1763526858_optimized,1763526858.7845938,True,102,0,0.1053924560546875,0,0,86.193359375,,cuda +run_1763526861_non_optimized,1763526861.6461842,False,106,0,0.0002357959747314453,0,0,73.24609375,,cuda +run_1763526861_optimized,1763526861.7578971,True,106,0,0.10634636878967285,0,0,86.3837890625,,cuda +run_1763526864_non_optimized,1763526864.629814,False,105,0,0.0002498626708984375,0,0,73.24609375,,cuda +run_1763526864_optimized,1763526864.7408218,True,105,0,0.1056220531463623,0,0,86.33642578125,,cuda +run_1763526867_non_optimized,1763526867.6077204,False,105,0,0.0002434253692626953,0,0,73.24609375,,cuda +run_1763526867_optimized,1763526867.7184992,True,105,0,0.10547971725463867,0,0,86.33642578125,,cuda +run_1763526870_non_optimized,1763526870.5837529,False,110,0,0.00023603439331054688,0,0,73.24609375,,cuda +run_1763526870_optimized,1763526870.6944253,True,110,0,0.10529303550720215,0,0,86.57470703125,,cuda diff --git a/inference_benchmarks/inference_metrics.json b/inference_benchmarks/inference_metrics.json index 4e48239..45fec15 100644 --- a/inference_benchmarks/inference_metrics.json +++ b/inference_benchmarks/inference_metrics.json @@ -5199,6 +5199,2606 @@ "memory_used_mb": 73.2470703125, "gpu_utilization": null, "device": "cuda" + }, + { + "run_name": "run_1763526557_non_optimized", + "timestamp": 1763526558.0903924, + "optimized": false, + "prompt_length": 53, + "generated_length": 47, + "total_time": 0.35442304611206055, + "tokens_per_second": 132.60988673163106, + "time_per_token": 7.540915874724693, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526557_optimized", + "timestamp": 1763526558.221367, + "optimized": true, + "prompt_length": 53, + "generated_length": 47, + "total_time": 0.12709331512451172, + "tokens_per_second": 369.8070189919485, + "time_per_token": 2.7041130877555686, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526561_non_optimized", + "timestamp": 1763526561.392078, + "optimized": false, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.3155982494354248, + "tokens_per_second": 107.73190301537717, + "time_per_token": 9.282301453983083, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526561_optimized", + "timestamp": 1763526561.485392, + "optimized": true, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.08941459655761719, + "tokens_per_second": 380.25111457155657, + "time_per_token": 2.629841075224035, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526564_non_optimized", + "timestamp": 1763526564.6852732, + "optimized": false, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.3313627243041992, + "tokens_per_second": 120.71363815586875, + "time_per_token": 8.28406810760498, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526564_optimized", + "timestamp": 1763526564.794894, + "optimized": true, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.10575437545776367, + "tokens_per_second": 378.2349413616012, + "time_per_token": 2.643859386444092, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526567_non_optimized", + "timestamp": 1763526567.9853783, + "optimized": false, + "prompt_length": 61, + "generated_length": 39, + "total_time": 0.33505964279174805, + "tokens_per_second": 116.39718730387337, + "time_per_token": 8.591272892096104, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526567_optimized", + "timestamp": 1763526568.0917747, + "optimized": true, + "prompt_length": 61, + "generated_length": 39, + "total_time": 0.10251092910766602, + "tokens_per_second": 380.4472395234928, + "time_per_token": 2.628485361735026, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526570_non_optimized", + "timestamp": 1763526571.282682, + "optimized": false, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.33055973052978516, + "tokens_per_second": 121.00687502344086, + "time_per_token": 8.263993263244629, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526570_optimized", + "timestamp": 1763526571.3913667, + "optimized": true, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.10458731651306152, + "tokens_per_second": 382.4555532506138, + "time_per_token": 2.614682912826538, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526574_non_optimized", + "timestamp": 1763526574.5597281, + "optimized": false, + "prompt_length": 64, + "generated_length": 36, + "total_time": 0.3207573890686035, + "tokens_per_second": 112.23435913521645, + "time_per_token": 8.909927474127876, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526574_optimized", + "timestamp": 1763526574.657765, + "optimized": true, + "prompt_length": 64, + "generated_length": 36, + "total_time": 0.09418869018554688, + "tokens_per_second": 382.211494066664, + "time_per_token": 2.61635250515408, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526577_non_optimized", + "timestamp": 1763526577.870434, + "optimized": false, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.34548068046569824, + "tokens_per_second": 133.1478215742579, + "time_per_token": 7.510449575341266, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526577_optimized", + "timestamp": 1763526577.996214, + "optimized": true, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.12188839912414551, + "tokens_per_second": 377.39440611692817, + "time_per_token": 2.649747807046641, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526580_non_optimized", + "timestamp": 1763526581.1706474, + "optimized": false, + "prompt_length": 67, + "generated_length": 33, + "total_time": 0.31485915184020996, + "tokens_per_second": 104.80876864188276, + "time_per_token": 9.5411864194003, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526580_optimized", + "timestamp": 1763526581.2631214, + "optimized": true, + "prompt_length": 67, + "generated_length": 33, + "total_time": 0.08859968185424805, + "tokens_per_second": 372.4618340536148, + "time_per_token": 2.684838844068123, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526584_non_optimized", + "timestamp": 1763526584.4379666, + "optimized": false, + "prompt_length": 63, + "generated_length": 37, + "total_time": 0.32149577140808105, + "tokens_per_second": 115.08705025247488, + "time_per_token": 8.689074902921108, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526584_optimized", + "timestamp": 1763526584.538549, + "optimized": true, + "prompt_length": 63, + "generated_length": 37, + "total_time": 0.09666585922241211, + "tokens_per_second": 382.7618178499726, + "time_per_token": 2.612590789794922, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526587_non_optimized", + "timestamp": 1763526587.7008932, + "optimized": false, + "prompt_length": 73, + "generated_length": 27, + "total_time": 0.30018019676208496, + "tokens_per_second": 89.94597342275547, + "time_per_token": 11.117785065262407, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526587_optimized", + "timestamp": 1763526587.775923, + "optimized": true, + "prompt_length": 73, + "generated_length": 27, + "total_time": 0.0710904598236084, + "tokens_per_second": 379.79779659595874, + "time_per_token": 2.6329799934669778, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526590_non_optimized", + "timestamp": 1763526590.9904318, + "optimized": false, + "prompt_length": 46, + "generated_length": 54, + "total_time": 0.36146068572998047, + "tokens_per_second": 149.3938404143328, + "time_per_token": 6.693716402407046, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526590_optimized", + "timestamp": 1763526591.1326938, + "optimized": true, + "prompt_length": 46, + "generated_length": 54, + "total_time": 0.1380624771118164, + "tokens_per_second": 391.1272717225373, + "time_per_token": 2.556712539107711, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526593_non_optimized", + "timestamp": 1763526594.285255, + "optimized": false, + "prompt_length": 70, + "generated_length": 30, + "total_time": 0.3071746826171875, + "tokens_per_second": 97.66429884258109, + "time_per_token": 10.239156087239582, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526593_optimized", + "timestamp": 1763526594.3690982, + "optimized": true, + "prompt_length": 70, + "generated_length": 30, + "total_time": 0.07981586456298828, + "tokens_per_second": 375.8651261156847, + "time_per_token": 2.6605288187662763, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526597_non_optimized", + "timestamp": 1763526597.541682, + "optimized": false, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.3228602409362793, + "tokens_per_second": 105.30872398967932, + "time_per_token": 9.495889439302333, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526597_optimized", + "timestamp": 1763526597.6348116, + "optimized": true, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.08911824226379395, + "tokens_per_second": 381.515603722956, + "time_per_token": 2.621124772464528, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526600_non_optimized", + "timestamp": 1763526600.8545513, + "optimized": false, + "prompt_length": 50, + "generated_length": 50, + "total_time": 0.3591768741607666, + "tokens_per_second": 139.20718063162423, + "time_per_token": 7.183537483215332, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526600_optimized", + "timestamp": 1763526600.9873838, + "optimized": true, + "prompt_length": 50, + "generated_length": 50, + "total_time": 0.12882018089294434, + "tokens_per_second": 388.13794277740044, + "time_per_token": 2.5764036178588867, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526603_non_optimized", + "timestamp": 1763526604.1675096, + "optimized": false, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.323777437210083, + "tokens_per_second": 105.01040558282972, + "time_per_token": 9.52286580029656, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526603_optimized", + "timestamp": 1763526604.2624831, + "optimized": true, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.09089446067810059, + "tokens_per_second": 374.060198458185, + "time_per_token": 2.6733664905323704, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526607_non_optimized", + "timestamp": 1763526607.4869204, + "optimized": false, + "prompt_length": 43, + "generated_length": 57, + "total_time": 0.37198734283447266, + "tokens_per_second": 153.23102008168036, + "time_per_token": 6.526093733938117, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526607_optimized", + "timestamp": 1763526607.6380951, + "optimized": true, + "prompt_length": 43, + "generated_length": 57, + "total_time": 0.14709210395812988, + "tokens_per_second": 387.51230328600906, + "time_per_token": 2.580563227335612, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526610_non_optimized", + "timestamp": 1763526610.8189526, + "optimized": false, + "prompt_length": 61, + "generated_length": 39, + "total_time": 0.325711727142334, + "tokens_per_second": 119.73778267724835, + "time_per_token": 8.351582747239332, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526610_optimized", + "timestamp": 1763526610.9252608, + "optimized": true, + "prompt_length": 61, + "generated_length": 39, + "total_time": 0.10221171379089355, + "tokens_per_second": 381.5609635485308, + "time_per_token": 2.620813174125476, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526613_non_optimized", + "timestamp": 1763526614.11627, + "optimized": false, + "prompt_length": 58, + "generated_length": 42, + "total_time": 0.3369486331939697, + "tokens_per_second": 124.64807944723742, + "time_per_token": 8.022586504618326, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526613_optimized", + "timestamp": 1763526614.2310555, + "optimized": true, + "prompt_length": 58, + "generated_length": 42, + "total_time": 0.11066627502441406, + "tokens_per_second": 379.51941538408505, + "time_per_token": 2.634911310105097, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526617_non_optimized", + "timestamp": 1763526617.3865545, + "optimized": false, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.3230869770050049, + "tokens_per_second": 105.23482040402178, + "time_per_token": 9.502558147206026, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526617_optimized", + "timestamp": 1763526617.4830964, + "optimized": true, + "prompt_length": 66, + "generated_length": 34, + "total_time": 0.09008550643920898, + "tokens_per_second": 377.41920253224856, + "time_per_token": 2.6495737188002644, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526620_non_optimized", + "timestamp": 1763526620.6447241, + "optimized": false, + "prompt_length": 67, + "generated_length": 33, + "total_time": 0.32021570205688477, + "tokens_per_second": 103.05553346705562, + "time_per_token": 9.703506122935902, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526620_optimized", + "timestamp": 1763526620.7375038, + "optimized": true, + "prompt_length": 67, + "generated_length": 33, + "total_time": 0.08865785598754883, + "tokens_per_second": 372.21743783917515, + "time_per_token": 2.6866016965923887, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526623_non_optimized", + "timestamp": 1763526623.9323657, + "optimized": false, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.34384703636169434, + "tokens_per_second": 119.2390675627982, + "time_per_token": 8.386513081992545, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526623_optimized", + "timestamp": 1763526624.045949, + "optimized": true, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.10931515693664551, + "tokens_per_second": 375.06235319006936, + "time_per_token": 2.666223339918183, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526626_non_optimized", + "timestamp": 1763526627.2393768, + "optimized": false, + "prompt_length": 63, + "generated_length": 37, + "total_time": 0.3268465995788574, + "tokens_per_second": 113.20295223408958, + "time_per_token": 8.83369188050966, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526626_optimized", + "timestamp": 1763526627.3409727, + "optimized": true, + "prompt_length": 63, + "generated_length": 37, + "total_time": 0.09740710258483887, + "tokens_per_second": 379.84909742874277, + "time_per_token": 2.632624394184834, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526630_non_optimized", + "timestamp": 1763526630.5474806, + "optimized": false, + "prompt_length": 53, + "generated_length": 47, + "total_time": 0.35274362564086914, + "tokens_per_second": 133.2412454360013, + "time_per_token": 7.505183524273812, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526630_optimized", + "timestamp": 1763526630.6753845, + "optimized": true, + "prompt_length": 53, + "generated_length": 47, + "total_time": 0.12363100051879883, + "tokens_per_second": 380.1635496175846, + "time_per_token": 2.630446819548911, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526633_non_optimized", + "timestamp": 1763526633.845649, + "optimized": false, + "prompt_length": 67, + "generated_length": 33, + "total_time": 0.3140745162963867, + "tokens_per_second": 105.07060677555408, + "time_per_token": 9.51740958473899, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526633_optimized", + "timestamp": 1763526633.9381201, + "optimized": true, + "prompt_length": 67, + "generated_length": 33, + "total_time": 0.08824992179870605, + "tokens_per_second": 373.9380084128738, + "time_per_token": 2.674240054506244, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526636_non_optimized", + "timestamp": 1763526637.118738, + "optimized": false, + "prompt_length": 64, + "generated_length": 36, + "total_time": 0.32010531425476074, + "tokens_per_second": 112.4629876383397, + "time_per_token": 8.891814284854464, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526636_optimized", + "timestamp": 1763526637.2179773, + "optimized": true, + "prompt_length": 64, + "generated_length": 36, + "total_time": 0.09497523307800293, + "tokens_per_second": 379.04618744587117, + "time_per_token": 2.6382009188334146, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526640_non_optimized", + "timestamp": 1763526640.4208992, + "optimized": false, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.34542250633239746, + "tokens_per_second": 133.17024558826677, + "time_per_token": 7.509184920269511, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526640_optimized", + "timestamp": 1763526640.5462801, + "optimized": true, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.12082409858703613, + "tokens_per_second": 380.718751788276, + "time_per_token": 2.6266108388486114, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526643_non_optimized", + "timestamp": 1763526643.7587085, + "optimized": false, + "prompt_length": 51, + "generated_length": 49, + "total_time": 0.35083961486816406, + "tokens_per_second": 139.66495778537683, + "time_per_token": 7.159992140166613, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526643_optimized", + "timestamp": 1763526643.8911211, + "optimized": true, + "prompt_length": 51, + "generated_length": 49, + "total_time": 0.12817978858947754, + "tokens_per_second": 382.2755563822367, + "time_per_token": 2.61591405284648, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526646_non_optimized", + "timestamp": 1763526647.0730722, + "optimized": false, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.33283185958862305, + "tokens_per_second": 120.18080255129306, + "time_per_token": 8.320796489715576, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526646_optimized", + "timestamp": 1763526647.1849632, + "optimized": true, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.10762190818786621, + "tokens_per_second": 371.6715367114238, + "time_per_token": 2.6905477046966553, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526650_non_optimized", + "timestamp": 1763526650.3852422, + "optimized": false, + "prompt_length": 56, + "generated_length": 44, + "total_time": 0.34725308418273926, + "tokens_per_second": 126.70873781741659, + "time_per_token": 7.89211554960771, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526650_optimized", + "timestamp": 1763526650.5039096, + "optimized": true, + "prompt_length": 56, + "generated_length": 44, + "total_time": 0.11433672904968262, + "tokens_per_second": 384.8282206925889, + "time_per_token": 2.598562023856423, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526653_non_optimized", + "timestamp": 1763526653.6856203, + "optimized": false, + "prompt_length": 65, + "generated_length": 35, + "total_time": 0.31678080558776855, + "tokens_per_second": 110.48649218206107, + "time_per_token": 9.05088015965053, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526653_optimized", + "timestamp": 1763526653.7829595, + "optimized": true, + "prompt_length": 65, + "generated_length": 35, + "total_time": 0.09303498268127441, + "tokens_per_second": 376.2025744649772, + "time_per_token": 2.658142362322126, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526656_non_optimized", + "timestamp": 1763526656.986791, + "optimized": false, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.33247804641723633, + "tokens_per_second": 123.31641274307752, + "time_per_token": 8.109220644322837, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526656_optimized", + "timestamp": 1763526657.097655, + "optimized": true, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.10654258728027344, + "tokens_per_second": 384.8226427254337, + "time_per_token": 2.598599689762767, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526659_non_optimized", + "timestamp": 1763526660.297603, + "optimized": false, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.3501875400543213, + "tokens_per_second": 131.35818593906697, + "time_per_token": 7.61277260987655, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526659_optimized", + "timestamp": 1763526660.426392, + "optimized": true, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.12442255020141602, + "tokens_per_second": 369.7079020284901, + "time_per_token": 2.7048380478568697, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526663_non_optimized", + "timestamp": 1763526663.6286657, + "optimized": false, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.3505382537841797, + "tokens_per_second": 131.22676199648498, + "time_per_token": 7.62039682139521, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526663_optimized", + "timestamp": 1763526663.7522638, + "optimized": true, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.11925172805786133, + "tokens_per_second": 385.7386450423649, + "time_per_token": 2.5924288708230723, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526666_non_optimized", + "timestamp": 1763526666.9662895, + "optimized": false, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.33091211318969727, + "tokens_per_second": 123.8999672897937, + "time_per_token": 8.071027150968225, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526666_optimized", + "timestamp": 1763526667.077263, + "optimized": true, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.10660624504089355, + "tokens_per_second": 384.5928536763736, + "time_per_token": 2.6001523180705743, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526669_non_optimized", + "timestamp": 1763526670.2709506, + "optimized": false, + "prompt_length": 65, + "generated_length": 35, + "total_time": 0.32552552223205566, + "tokens_per_second": 107.51845127230834, + "time_per_token": 9.300729206630162, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526669_optimized", + "timestamp": 1763526670.3674176, + "optimized": true, + "prompt_length": 65, + "generated_length": 35, + "total_time": 0.09210538864135742, + "tokens_per_second": 379.9994822917907, + "time_per_token": 2.631582532610212, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526673_non_optimized", + "timestamp": 1763526673.5689995, + "optimized": false, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.32962512969970703, + "tokens_per_second": 121.34997121257273, + "time_per_token": 8.240628242492676, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526673_optimized", + "timestamp": 1763526673.678643, + "optimized": true, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.10524392127990723, + "tokens_per_second": 380.0694568726284, + "time_per_token": 2.6310980319976807, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526676_non_optimized", + "timestamp": 1763526676.8867915, + "optimized": false, + "prompt_length": 56, + "generated_length": 44, + "total_time": 0.3409693241119385, + "tokens_per_second": 129.04386667216733, + "time_per_token": 7.749302820725875, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526676_optimized", + "timestamp": 1763526677.0066595, + "optimized": true, + "prompt_length": 56, + "generated_length": 44, + "total_time": 0.11539912223815918, + "tokens_per_second": 381.2853958212384, + "time_per_token": 2.622707323594527, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526679_non_optimized", + "timestamp": 1763526680.210214, + "optimized": false, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.34114503860473633, + "tokens_per_second": 120.18348608465084, + "time_per_token": 8.320610697676496, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526679_optimized", + "timestamp": 1763526680.3233387, + "optimized": true, + "prompt_length": 59, + "generated_length": 41, + "total_time": 0.10867524147033691, + "tokens_per_second": 377.27084334283273, + "time_per_token": 2.6506156456179735, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526683_non_optimized", + "timestamp": 1763526683.5293164, + "optimized": false, + "prompt_length": 49, + "generated_length": 51, + "total_time": 0.3626677989959717, + "tokens_per_second": 140.6245609375606, + "time_per_token": 7.111133313646503, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526683_optimized", + "timestamp": 1763526683.6654575, + "optimized": true, + "prompt_length": 49, + "generated_length": 51, + "total_time": 0.13167476654052734, + "tokens_per_second": 387.31794511519433, + "time_per_token": 2.581858167461321, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526686_non_optimized", + "timestamp": 1763526686.8576732, + "optimized": false, + "prompt_length": 57, + "generated_length": 43, + "total_time": 0.3366522789001465, + "tokens_per_second": 127.7282308632585, + "time_per_token": 7.829122765119686, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526686_optimized", + "timestamp": 1763526686.973754, + "optimized": true, + "prompt_length": 57, + "generated_length": 43, + "total_time": 0.11158990859985352, + "tokens_per_second": 385.33950371975163, + "time_per_token": 2.5951141534849658, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526689_non_optimized", + "timestamp": 1763526690.151102, + "optimized": false, + "prompt_length": 68, + "generated_length": 32, + "total_time": 0.3112297058105469, + "tokens_per_second": 102.81794893794354, + "time_per_token": 9.72592830657959, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526689_optimized", + "timestamp": 1763526690.239994, + "optimized": true, + "prompt_length": 68, + "generated_length": 32, + "total_time": 0.08438873291015625, + "tokens_per_second": 379.19754090950187, + "time_per_token": 2.637147903442383, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526693_non_optimized", + "timestamp": 1763526693.4361937, + "optimized": false, + "prompt_length": 57, + "generated_length": 43, + "total_time": 0.33553242683410645, + "tokens_per_second": 128.15452862700513, + "time_per_token": 7.803079693816429, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526693_optimized", + "timestamp": 1763526693.5529184, + "optimized": true, + "prompt_length": 57, + "generated_length": 43, + "total_time": 0.11221909523010254, + "tokens_per_second": 383.1789938408391, + "time_per_token": 2.6097464007000593, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526696_non_optimized", + "timestamp": 1763526696.7601304, + "optimized": false, + "prompt_length": 57, + "generated_length": 43, + "total_time": 0.34420084953308105, + "tokens_per_second": 124.92705947219716, + "time_per_token": 8.004670919373979, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526696_optimized", + "timestamp": 1763526696.8780057, + "optimized": true, + "prompt_length": 57, + "generated_length": 43, + "total_time": 0.11339879035949707, + "tokens_per_second": 379.192757380227, + "time_per_token": 2.6371811711510946, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526699_non_optimized", + "timestamp": 1763526700.0553536, + "optimized": false, + "prompt_length": 62, + "generated_length": 38, + "total_time": 0.32698941230773926, + "tokens_per_second": 116.21171380386193, + "time_per_token": 8.604984534414191, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526699_optimized", + "timestamp": 1763526700.160437, + "optimized": true, + "prompt_length": 62, + "generated_length": 38, + "total_time": 0.10051822662353516, + "tokens_per_second": 378.04089145264277, + "time_per_token": 2.6452164900930306, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526702_non_optimized", + "timestamp": 1763526703.3607254, + "optimized": false, + "prompt_length": 48, + "generated_length": 52, + "total_time": 0.3581371307373047, + "tokens_per_second": 145.19577987612305, + "time_per_token": 6.887252514178936, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526702_optimized", + "timestamp": 1763526703.501407, + "optimized": true, + "prompt_length": 48, + "generated_length": 52, + "total_time": 0.1360619068145752, + "tokens_per_second": 382.17897439042554, + "time_per_token": 2.6165751310495233, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526706_non_optimized", + "timestamp": 1763526706.6655996, + "optimized": false, + "prompt_length": 65, + "generated_length": 35, + "total_time": 0.32056474685668945, + "tokens_per_second": 109.18231135267965, + "time_per_token": 9.158992767333984, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526706_optimized", + "timestamp": 1763526706.7639794, + "optimized": true, + "prompt_length": 65, + "generated_length": 35, + "total_time": 0.09380245208740234, + "tokens_per_second": 373.1245742636668, + "time_per_token": 2.680070059640067, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526709_non_optimized", + "timestamp": 1763526709.9760077, + "optimized": false, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.3535304069519043, + "tokens_per_second": 130.11610626821704, + "time_per_token": 7.685443629389224, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526709_optimized", + "timestamp": 1763526710.1015396, + "optimized": true, + "prompt_length": 54, + "generated_length": 46, + "total_time": 0.12090945243835449, + "tokens_per_second": 380.44999023920843, + "time_per_token": 2.6284663573555327, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526712_non_optimized", + "timestamp": 1763526713.2750318, + "optimized": false, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.3372828960418701, + "tokens_per_second": 118.59480711715196, + "time_per_token": 8.432072401046753, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526712_optimized", + "timestamp": 1763526713.3839555, + "optimized": true, + "prompt_length": 60, + "generated_length": 40, + "total_time": 0.1043252944946289, + "tokens_per_second": 383.41612351795817, + "time_per_token": 2.6081323623657227, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526716_non_optimized", + "timestamp": 1763526716.5846882, + "optimized": false, + "prompt_length": 56, + "generated_length": 44, + "total_time": 0.3465569019317627, + "tokens_per_second": 126.96327718402686, + "time_per_token": 7.87629322572188, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526716_optimized", + "timestamp": 1763526716.704748, + "optimized": true, + "prompt_length": 56, + "generated_length": 44, + "total_time": 0.11548757553100586, + "tokens_per_second": 380.9933648506369, + "time_per_token": 2.6247176257046787, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526719_non_optimized", + "timestamp": 1763526719.9162998, + "optimized": false, + "prompt_length": 55, + "generated_length": 45, + "total_time": 0.34871912002563477, + "tokens_per_second": 129.04368420260982, + "time_per_token": 7.74931377834744, + "memory_used_mb": 83.77001953125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526719_optimized", + "timestamp": 1763526720.0379987, + "optimized": true, + "prompt_length": 55, + "generated_length": 45, + "total_time": 0.11701393127441406, + "tokens_per_second": 384.5695936364081, + "time_per_token": 2.6003095838758683, + "memory_used_mb": 86.13916015625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526722_non_optimized", + "timestamp": 1763526723.1653128, + "optimized": false, + "prompt_length": 86, + "generated_length": 14, + "total_time": 0.2764289379119873, + "tokens_per_second": 50.64592768669351, + "time_per_token": 19.74492413657052, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526722_optimized", + "timestamp": 1763526723.2101393, + "optimized": true, + "prompt_length": 86, + "generated_length": 14, + "total_time": 0.04014396667480469, + "tokens_per_second": 348.74480923647076, + "time_per_token": 2.867426191057478, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526726_non_optimized", + "timestamp": 1763526726.321796, + "optimized": false, + "prompt_length": 87, + "generated_length": 13, + "total_time": 0.2667272090911865, + "tokens_per_second": 48.73893460024045, + "time_per_token": 20.517477622398964, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526726_optimized", + "timestamp": 1763526726.3628628, + "optimized": true, + "prompt_length": 87, + "generated_length": 13, + "total_time": 0.03642010688781738, + "tokens_per_second": 356.9456849767932, + "time_per_token": 2.80154668367826, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526729_non_optimized", + "timestamp": 1763526729.4611804, + "optimized": false, + "prompt_length": 92, + "generated_length": 8, + "total_time": 0.2541630268096924, + "tokens_per_second": 31.47586059395687, + "time_per_token": 31.770378351211548, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526729_optimized", + "timestamp": 1763526729.489691, + "optimized": true, + "prompt_length": 92, + "generated_length": 8, + "total_time": 0.02371382713317871, + "tokens_per_second": 337.3559212973669, + "time_per_token": 2.964228391647339, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526732_non_optimized", + "timestamp": 1763526732.6022036, + "optimized": false, + "prompt_length": 91, + "generated_length": 9, + "total_time": 0.25944042205810547, + "tokens_per_second": 34.69004532371721, + "time_per_token": 28.82671356201172, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526732_optimized", + "timestamp": 1763526732.6337376, + "optimized": true, + "prompt_length": 91, + "generated_length": 9, + "total_time": 0.02675771713256836, + "tokens_per_second": 336.3515637530072, + "time_per_token": 2.9730796813964844, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526735_non_optimized", + "timestamp": 1763526735.746593, + "optimized": false, + "prompt_length": 96, + "generated_length": 4, + "total_time": 0.24565601348876953, + "tokens_per_second": 16.28293133635365, + "time_per_token": 61.41400337219238, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526735_optimized", + "timestamp": 1763526735.7645717, + "optimized": true, + "prompt_length": 96, + "generated_length": 4, + "total_time": 0.013253450393676758, + "tokens_per_second": 301.80819946392273, + "time_per_token": 3.3133625984191895, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526738_non_optimized", + "timestamp": 1763526738.6402805, + "optimized": false, + "prompt_length": 100, + "generated_length": 0, + "total_time": 0.0002372264862060547, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526738_optimized", + "timestamp": 1763526738.7505527, + "optimized": true, + "prompt_length": 100, + "generated_length": 0, + "total_time": 0.10556960105895996, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.09814453125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526741_non_optimized", + "timestamp": 1763526741.855399, + "optimized": false, + "prompt_length": 98, + "generated_length": 2, + "total_time": 0.23850727081298828, + "tokens_per_second": 8.385488598241453, + "time_per_token": 119.25363540649414, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526741_optimized", + "timestamp": 1763526741.8686676, + "optimized": true, + "prompt_length": 98, + "generated_length": 2, + "total_time": 0.008454322814941406, + "tokens_per_second": 236.56536943034405, + "time_per_token": 4.227161407470703, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526744_non_optimized", + "timestamp": 1763526744.9965777, + "optimized": false, + "prompt_length": 95, + "generated_length": 5, + "total_time": 0.2477104663848877, + "tokens_per_second": 20.184855621849653, + "time_per_token": 49.54209327697754, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526744_optimized", + "timestamp": 1763526745.0179746, + "optimized": true, + "prompt_length": 95, + "generated_length": 5, + "total_time": 0.016021251678466797, + "tokens_per_second": 312.0854787344862, + "time_per_token": 3.2042503356933594, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526747_non_optimized", + "timestamp": 1763526748.1259825, + "optimized": false, + "prompt_length": 99, + "generated_length": 1, + "total_time": 0.2374560832977295, + "tokens_per_second": 4.211305038440183, + "time_per_token": 237.4560832977295, + "memory_used_mb": 83.72900390625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526747_optimized", + "timestamp": 1763526748.1369371, + "optimized": true, + "prompt_length": 99, + "generated_length": 1, + "total_time": 0.006153583526611328, + "tokens_per_second": 162.50693529639673, + "time_per_token": 6.153583526611328, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526750_non_optimized", + "timestamp": 1763526750.990292, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.0002415180206298828, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526750_optimized", + "timestamp": 1763526751.1003447, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.10532331466674805, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.431640625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526753_non_optimized", + "timestamp": 1763526753.9551547, + "optimized": false, + "prompt_length": 111, + "generated_length": 0, + "total_time": 0.0002372264862060547, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526753_optimized", + "timestamp": 1763526754.065468, + "optimized": true, + "prompt_length": 111, + "generated_length": 0, + "total_time": 0.10554146766662598, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.62255859375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526756_non_optimized", + "timestamp": 1763526756.9100564, + "optimized": false, + "prompt_length": 104, + "generated_length": 0, + "total_time": 0.000232696533203125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526756_optimized", + "timestamp": 1763526757.0201974, + "optimized": true, + "prompt_length": 104, + "generated_length": 0, + "total_time": 0.10536527633666992, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.28857421875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526759_non_optimized", + "timestamp": 1763526760.134444, + "optimized": false, + "prompt_length": 97, + "generated_length": 3, + "total_time": 0.24916291236877441, + "tokens_per_second": 12.04031519570553, + "time_per_token": 83.0543041229248, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526759_optimized", + "timestamp": 1763526760.1502607, + "optimized": true, + "prompt_length": 97, + "generated_length": 3, + "total_time": 0.01096487045288086, + "tokens_per_second": 273.6010437051533, + "time_per_token": 3.654956817626953, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526763_non_optimized", + "timestamp": 1763526763.0106947, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0002446174621582031, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526763_optimized", + "timestamp": 1763526763.1213396, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.1058340072631836, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526765_non_optimized", + "timestamp": 1763526765.988826, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0002529621124267578, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526765_optimized", + "timestamp": 1763526766.099505, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.10576152801513672, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526768_non_optimized", + "timestamp": 1763526768.9637494, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.00024008750915527344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526768_optimized", + "timestamp": 1763526769.0741549, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.10558032989501953, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.33642578125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526771_non_optimized", + "timestamp": 1763526772.1941426, + "optimized": false, + "prompt_length": 94, + "generated_length": 6, + "total_time": 0.25710487365722656, + "tokens_per_second": 23.33678049214745, + "time_per_token": 42.85081227620443, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526771_optimized", + "timestamp": 1763526772.217405, + "optimized": true, + "prompt_length": 94, + "generated_length": 6, + "total_time": 0.01833653450012207, + "tokens_per_second": 327.215592453419, + "time_per_token": 3.0560890833536782, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526775_non_optimized", + "timestamp": 1763526775.0632038, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00023651123046875, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526775_optimized", + "timestamp": 1763526775.1732247, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.10519266128540039, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526778_non_optimized", + "timestamp": 1763526778.0269144, + "optimized": false, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.00025081634521484375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526778_optimized", + "timestamp": 1763526778.1373425, + "optimized": true, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.1055910587310791, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.24072265625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526780_non_optimized", + "timestamp": 1763526780.9818347, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.00024700164794921875, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526780_optimized", + "timestamp": 1763526781.0923955, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.10562825202941895, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.431640625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526783_non_optimized", + "timestamp": 1763526783.948955, + "optimized": false, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.00024271011352539062, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526783_optimized", + "timestamp": 1763526784.0592883, + "optimized": true, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.10540580749511719, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.57470703125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526786_non_optimized", + "timestamp": 1763526786.926905, + "optimized": false, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.0002503395080566406, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526786_optimized", + "timestamp": 1763526787.0370946, + "optimized": true, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.10536813735961914, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.3837890625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526789_non_optimized", + "timestamp": 1763526789.8929183, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.00025582313537597656, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526789_optimized", + "timestamp": 1763526790.0033913, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.10552310943603516, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.52685546875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526792_non_optimized", + "timestamp": 1763526792.8616316, + "optimized": false, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.0002377033233642578, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526792_optimized", + "timestamp": 1763526792.972686, + "optimized": true, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.10608291625976562, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.57470703125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526795_non_optimized", + "timestamp": 1763526795.8468068, + "optimized": false, + "prompt_length": 114, + "generated_length": 0, + "total_time": 0.00024056434631347656, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526795_optimized", + "timestamp": 1763526795.9571438, + "optimized": true, + "prompt_length": 114, + "generated_length": 0, + "total_time": 0.1053767204284668, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526798_non_optimized", + "timestamp": 1763526798.8149438, + "optimized": false, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.00024628639221191406, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526798_optimized", + "timestamp": 1763526798.9252326, + "optimized": true, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.10529732704162598, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.7177734375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526801_non_optimized", + "timestamp": 1763526801.7890985, + "optimized": false, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.0002460479736328125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526801_optimized", + "timestamp": 1763526801.899515, + "optimized": true, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.1053619384765625, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.7177734375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526804_non_optimized", + "timestamp": 1763526804.7603977, + "optimized": false, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.00023674964904785156, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526804_optimized", + "timestamp": 1763526804.8709013, + "optimized": true, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.10542559623718262, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.669921875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526807_non_optimized", + "timestamp": 1763526807.7322128, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.0002338886260986328, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526807_optimized", + "timestamp": 1763526807.842512, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.10524344444274902, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.52685546875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526810_non_optimized", + "timestamp": 1763526810.7003376, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00023674964904785156, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526810_optimized", + "timestamp": 1763526810.8109784, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.10563492774963379, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526813_non_optimized", + "timestamp": 1763526813.6818757, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0002524852752685547, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526813_optimized", + "timestamp": 1763526813.7921646, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.10520672798156738, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.33642578125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526816_non_optimized", + "timestamp": 1763526816.6591551, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.00023221969604492188, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526816_optimized", + "timestamp": 1763526816.7695994, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.10536456108093262, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.52685546875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526819_non_optimized", + "timestamp": 1763526819.6383152, + "optimized": false, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.00023484230041503906, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526819_optimized", + "timestamp": 1763526819.7488954, + "optimized": true, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.10547161102294922, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.24072265625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526822_non_optimized", + "timestamp": 1763526822.6187825, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0002391338348388672, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526822_optimized", + "timestamp": 1763526822.7292917, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.10544276237487793, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526825_non_optimized", + "timestamp": 1763526825.583301, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.00023412704467773438, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526825_optimized", + "timestamp": 1763526825.6933794, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.10494589805603027, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.431640625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526828_non_optimized", + "timestamp": 1763526828.5638628, + "optimized": false, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.00024127960205078125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526828_optimized", + "timestamp": 1763526828.6741154, + "optimized": true, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.1051325798034668, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.669921875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526831_non_optimized", + "timestamp": 1763526831.545799, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.00023818016052246094, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526831_optimized", + "timestamp": 1763526831.6566033, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.10563468933105469, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.33642578125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526834_non_optimized", + "timestamp": 1763526834.7725186, + "optimized": false, + "prompt_length": 97, + "generated_length": 3, + "total_time": 0.2505476474761963, + "tokens_per_second": 11.973770379484487, + "time_per_token": 83.51588249206543, + "memory_used_mb": 83.7705078125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526834_optimized", + "timestamp": 1763526834.7885733, + "optimized": true, + "prompt_length": 97, + "generated_length": 3, + "total_time": 0.01080775260925293, + "tokens_per_second": 277.5785224239483, + "time_per_token": 3.60258420308431, + "memory_used_mb": 86.14013671875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526837_non_optimized", + "timestamp": 1763526837.7080257, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00025272369384765625, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526837_optimized", + "timestamp": 1763526837.8196452, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.10632824897766113, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526840_non_optimized", + "timestamp": 1763526840.7012856, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00023818016052246094, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526840_optimized", + "timestamp": 1763526840.8124819, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.10594463348388672, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526843_non_optimized", + "timestamp": 1763526843.7108738, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.0002484321594238281, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526843_optimized", + "timestamp": 1763526843.8216114, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.10553407669067383, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.52685546875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526846_non_optimized", + "timestamp": 1763526846.7072785, + "optimized": false, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.00023412704467773438, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526846_optimized", + "timestamp": 1763526846.8180177, + "optimized": true, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.10541939735412598, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.669921875, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526849_non_optimized", + "timestamp": 1763526849.695143, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.0002357959747314453, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526849_optimized", + "timestamp": 1763526849.806518, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.10609865188598633, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.431640625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526852_non_optimized", + "timestamp": 1763526852.698653, + "optimized": false, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.0002410411834716797, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526852_optimized", + "timestamp": 1763526852.8100939, + "optimized": true, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.10619211196899414, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.3837890625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526855_non_optimized", + "timestamp": 1763526855.6878016, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.00023484230041503906, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526855_optimized", + "timestamp": 1763526855.798402, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.10535454750061035, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.33642578125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526858_non_optimized", + "timestamp": 1763526858.673868, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00024962425231933594, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526858_optimized", + "timestamp": 1763526858.7845938, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.1053924560546875, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.193359375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526861_non_optimized", + "timestamp": 1763526861.6461842, + "optimized": false, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.0002357959747314453, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526861_optimized", + "timestamp": 1763526861.7578971, + "optimized": true, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.10634636878967285, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.3837890625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526864_non_optimized", + "timestamp": 1763526864.629814, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0002498626708984375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526864_optimized", + "timestamp": 1763526864.7408218, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.1056220531463623, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.33642578125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526867_non_optimized", + "timestamp": 1763526867.6077204, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0002434253692626953, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526867_optimized", + "timestamp": 1763526867.7184992, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.10547971725463867, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.33642578125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526870_non_optimized", + "timestamp": 1763526870.5837529, + "optimized": false, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.00023603439331054688, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 73.24609375, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763526870_optimized", + "timestamp": 1763526870.6944253, + "optimized": true, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.10529303550720215, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 86.57470703125, + "gpu_utilization": null, + "device": "cuda" } ] } \ No newline at end of file diff --git a/inference_benchmarks/optimization_comparison.png b/inference_benchmarks/optimization_comparison.png new file mode 100644 index 0000000..fc86ed4 Binary files /dev/null and b/inference_benchmarks/optimization_comparison.png differ diff --git a/inference_benchmarks/performance_over_time.png b/inference_benchmarks/performance_over_time.png new file mode 100644 index 0000000..45a653b Binary files /dev/null and b/inference_benchmarks/performance_over_time.png differ