From 5fe3dc0753dc6dd13da2b370c8e7e67b80210831 Mon Sep 17 00:00:00 2001 From: Carlos Gutierrez Date: Sun, 16 Nov 2025 21:03:15 +0000 Subject: [PATCH] adding the needed data --- inference_benchmarks/inference_metrics.csv | 201 ++ inference_benchmarks/inference_metrics.json | 2604 +++++++++++++++++++ 2 files changed, 2805 insertions(+) create mode 100644 inference_benchmarks/inference_metrics.csv create mode 100644 inference_benchmarks/inference_metrics.json diff --git a/inference_benchmarks/inference_metrics.csv b/inference_benchmarks/inference_metrics.csv new file mode 100644 index 0000000..2523100 --- /dev/null +++ b/inference_benchmarks/inference_metrics.csv @@ -0,0 +1,201 @@ +run_name,timestamp,optimized,prompt_length,generated_length,total_time,tokens_per_second,time_per_token,memory_used_mb,gpu_utilization,device +run_1763326618_non_optimized,1763326618.9855335,False,53,5,0.4859466552734375,10.289195214619902,97.1893310546875,8.12548828125,,cuda +run_1763326618_optimized,1763326619.026237,True,53,14,0.04017782211303711,348.4509429259782,2.869844436645508,0.0,,cuda +run_1763326622_non_optimized,1763326622.7140436,False,66,0,0.3553469181060791,0,0,8.1259765625,,cuda +run_1763326622_optimized,1763326622.717709,True,66,0,0.003069162368774414,0,0,0.0,,cuda +run_1763326626_non_optimized,1763326626.4152546,False,60,14,0.3536839485168457,39.58336265671155,25.263139179774694,8.12548828125,,cuda +run_1763326626_optimized,1763326626.4510481,True,60,12,0.03518319129943848,341.0719595579017,2.9319326082865396,0.0,,cuda +run_1763326629_non_optimized,1763326630.1408024,False,61,16,0.35126209259033203,45.55003325867101,21.953880786895752,8.12548828125,,cuda +run_1763326629_optimized,1763326630.1444962,True,61,0,0.003066539764404297,0,0,0.0,,cuda +run_1763326633_non_optimized,1763326633.8381388,False,60,0,0.35619688034057617,0,0,8.12548828125,,cuda +run_1763326633_optimized,1763326633.8418753,True,60,0,0.003074169158935547,0,0,0.0,,cuda +run_1763326637_non_optimized,1763326637.540628,False,64,8,0.3398597240447998,23.539123450078044,42.482465505599976,8.12548828125,,cuda +run_1763326637_optimized,1763326637.5471995,True,64,1,0.005898714065551758,169.52847500101046,5.898714065551758,0.0,,cuda +run_1763326640_non_optimized,1763326641.2843578,False,54,30,0.37198519706726074,80.64837051721585,12.399506568908691,8.12548828125,,cuda +run_1763326640_optimized,1763326641.3555984,True,54,25,0.0705265998840332,354.4761840370508,2.821063995361328,0.0,,cuda +run_1763326644_non_optimized,1763326645.0330086,False,67,0,0.333660364151001,0,0,8.1259765625,,cuda +run_1763326644_optimized,1763326645.0367913,True,67,0,0.0030765533447265625,0,0,0.0,,cuda +run_1763326648_non_optimized,1763326648.755835,False,63,17,0.34771132469177246,48.891131213714694,20.453607334810144,8.12548828125,,cuda +run_1763326648_optimized,1763326648.7840471,True,63,9,0.027483701705932617,327.46680546519195,3.053744633992513,0.0,,cuda +run_1763326652_non_optimized,1763326652.4592273,False,73,19,0.31671810150146484,59.990256035025276,16.66937376323499,8.1259765625,,cuda +run_1763326652_optimized,1763326652.4657354,True,73,1,0.005753517150878906,173.80672965357203,5.753517150878906,0.0,,cuda +run_1763326655_non_optimized,1763326656.2020977,False,46,0,0.392134428024292,0,0,8.12548828125,,cuda +run_1763326655_optimized,1763326656.2059286,True,46,0,0.003073453903198242,0,0,0.0,,cuda +run_1763326659_non_optimized,1763326659.8722565,False,70,17,0.33252859115600586,51.12342352548099,19.56050536211799,8.1259765625,,cuda +run_1763326659_optimized,1763326659.913114,True,70,14,0.040065765380859375,349.42549747691135,2.861840384347098,0.0,,cuda +run_1763326663_non_optimized,1763326663.6047928,False,66,14,0.3415799140930176,40.986016514389014,24.3985652923584,8.1259765625,,cuda +run_1763326663_optimized,1763326663.6473217,True,66,14,0.041661977767944336,336.037815534814,2.9758555548531667,0.0,,cuda +run_1763326666_non_optimized,1763326667.383774,False,50,0,0.38038086891174316,0,0,8.12548828125,,cuda +run_1763326666_optimized,1763326667.4496503,True,50,23,0.06507229804992676,353.45301594164135,2.829230349996816,0.0,,cuda +run_1763326670_non_optimized,1763326671.1408262,False,66,0,0.34095191955566406,0,0,8.1259765625,,cuda +run_1763326670_optimized,1763326671.1447349,True,66,0,0.0030736923217773438,0,0,0.0,,cuda +run_1763326674_non_optimized,1763326674.9176548,False,43,43,0.4153327941894531,103.53143455458913,9.6589021904524,8.12548828125,,cuda +run_1763326674_optimized,1763326675.01074,True,43,32,0.09222531318664551,346.97632238167006,2.882041037082672,0.0,,cuda +run_1763326678_non_optimized,1763326678.7116284,False,61,0,0.34818243980407715,0,0,8.12548828125,,cuda +run_1763326678_optimized,1763326678.7930346,True,61,29,0.08052730560302734,360.1262923530596,2.776803641483702,0.0,,cuda +run_1763326682_non_optimized,1763326682.5197363,False,58,0,0.3616158962249756,0,0,8.12548828125,,cuda +run_1763326682_optimized,1763326682.579515,True,58,19,0.05885767936706543,322.8125913953667,3.0977725982666016,0.0,,cuda +run_1763326685_non_optimized,1763326686.2714303,False,66,0,0.3391129970550537,0,0,8.1259765625,,cuda +run_1763326685_optimized,1763326686.275417,True,66,0,0.0030994415283203125,0,0,0.0,,cuda +run_1763326689_non_optimized,1763326689.9624639,False,67,28,0.3353545665740967,83.4937191583267,11.976948806217738,8.1259765625,,cuda +run_1763326689_optimized,1763326689.9768,True,67,3,0.011225700378417969,267.24390451108655,3.7419001261393228,0.0,,cuda +run_1763326693_non_optimized,1763326693.711334,False,59,24,0.3620493412017822,66.28930719866715,15.085389216740927,8.12548828125,,cuda +run_1763326693_optimized,1763326693.7152996,True,59,0,0.0030515193939208984,0,0,0.0,,cuda +run_1763326697_non_optimized,1763326697.4116411,False,63,0,0.3452737331390381,0,0,8.12548828125,,cuda +run_1763326697_optimized,1763326697.4156432,True,63,0,0.0030455589294433594,0,0,0.0,,cuda +run_1763326700_non_optimized,1763326701.1587586,False,53,19,0.3794400691986084,50.07378382606958,19.970529957821494,8.12548828125,,cuda +run_1763326700_optimized,1763326701.2194357,True,53,21,0.05968618392944336,351.84021858098123,2.8421992347353977,0.0,,cuda +run_1763326704_non_optimized,1763326704.906457,False,67,0,0.3325045108795166,0,0,8.1259765625,,cuda +run_1763326704_optimized,1763326704.9105146,True,67,0,0.0030672550201416016,0,0,0.0,,cuda +run_1763326708_non_optimized,1763326708.6155152,False,64,4,0.3434574604034424,11.646274899084734,85.8643651008606,8.12548828125,,cuda +run_1763326708_optimized,1763326708.6492803,True,64,11,0.03272819519042969,336.1016376245702,2.9752904718572446,0.0,,cuda +run_1763326712_non_optimized,1763326712.3875144,False,54,5,0.3697817325592041,13.521490002753103,73.95634651184082,8.12548828125,,cuda +run_1763326712_optimized,1763326712.3916335,True,54,0,0.0030570030212402344,0,0,0.0,,cuda +run_1763326715_non_optimized,1763326716.129369,False,51,0,0.38543081283569336,0,0,8.12548828125,,cuda +run_1763326715_optimized,1763326716.133514,True,51,0,0.003108978271484375,0,0,0.0,,cuda +run_1763326719_non_optimized,1763326719.845239,False,60,10,0.35239410400390625,28.377319275152093,35.239410400390625,8.12548828125,,cuda +run_1763326719_optimized,1763326719.8974886,True,60,18,0.051172733306884766,351.7498252839718,2.842929628160265,0.0,,cuda +run_1763326723_non_optimized,1763326723.6197646,False,56,0,0.3644559383392334,0,0,8.12548828125,,cuda +run_1763326723_optimized,1763326723.6239054,True,56,0,0.0030570030212402344,0,0,0.0,,cuda +run_1763326726_non_optimized,1763326727.3191404,False,65,0,0.34804534912109375,0,0,8.1259765625,,cuda +run_1763326726_optimized,1763326727.3233125,True,65,0,0.0030825138092041016,0,0,0.0,,cuda +run_1763326730_non_optimized,1763326731.044284,False,59,16,0.35839104652404785,44.64397242950211,22.39944040775299,8.12548828125,,cuda +run_1763326730_optimized,1763326731.048486,True,59,0,0.0030989646911621094,0,0,0.0,,cuda +run_1763326734_non_optimized,1763326734.8039234,False,54,0,0.3762962818145752,0,0,8.12548828125,,cuda +run_1763326734_optimized,1763326734.84245,True,54,13,0.03740096092224121,347.58465235766965,2.8769969940185547,0.0,,cuda +run_1763326738_non_optimized,1763326738.6017425,False,54,41,0.370577335357666,110.63817478321626,9.038471594089415,8.12548828125,,cuda +run_1763326738_optimized,1763326738.6901221,True,54,31,0.08722376823425293,355.4077131447096,2.813669943040417,0.0,,cuda +run_1763326742_non_optimized,1763326742.419483,False,59,12,0.3536853790283203,33.928459335716944,29.47378158569336,8.12548828125,,cuda +run_1763326742_optimized,1763326742.4236658,True,59,0,0.0030400753021240234,0,0,0.0,,cuda +run_1763326745_non_optimized,1763326746.1228704,False,65,19,0.3396615982055664,55.93802802665087,17.8769262213456,8.1259765625,,cuda +run_1763326745_optimized,1763326746.1515133,True,65,9,0.02747321128845215,327.59184594423374,3.052579032050239,0.0,,cuda +run_1763326749_non_optimized,1763326749.8801262,False,60,0,0.35973238945007324,0,0,8.12548828125,,cuda +run_1763326749_optimized,1763326749.9388814,True,60,20,0.05757498741149902,347.37306770136695,2.878749370574951,0.0,,cuda +run_1763326753_non_optimized,1763326753.6588142,False,56,22,0.3657264709472656,60.154245720901606,16.62393049760298,8.12548828125,,cuda +run_1763326753_optimized,1763326753.6631608,True,56,0,0.0031464099884033203,0,0,0.0,,cuda +run_1763326757_non_optimized,1763326757.3905053,False,59,33,0.36070775985717773,91.4868036469921,10.930538177490234,8.12548828125,,cuda +run_1763326757_optimized,1763326757.4786851,True,59,31,0.08695292472839355,356.5147474548062,2.804933055754631,0.0,,cuda +run_1763326760_non_optimized,1763326761.2264946,False,49,17,0.38045310974121094,44.683561691908935,22.379594690659466,8.12548828125,,cuda +run_1763326760_optimized,1763326761.3133712,True,49,31,0.08566641807556152,361.8687543659949,2.763432841147146,0.0,,cuda +run_1763326764_non_optimized,1763326765.0356681,False,57,37,0.3631327152252197,101.89112258049266,9.814397708789722,8.12548828125,,cuda +run_1763326764_optimized,1763326765.0750856,True,57,13,0.03816509246826172,340.62540293360655,2.9357763437124396,0.0,,cuda +run_1763326768_non_optimized,1763326768.767545,False,68,19,0.331697940826416,57.281030906197486,17.457786359285052,8.1259765625,,cuda +run_1763326768_optimized,1763326768.7959456,True,68,9,0.027106285095214844,332.0263167153362,3.011809455023872,0.0,,cuda +run_1763326772_non_optimized,1763326772.5260317,False,57,10,0.36326122283935547,27.52839932056906,36.32612228393555,8.12548828125,,cuda +run_1763326772_optimized,1763326772.5303962,True,57,0,0.003071308135986328,0,0,0.0,,cuda +run_1763326775_non_optimized,1763326776.254764,False,57,25,0.3601036071777344,69.42446424220596,14.404144287109375,8.12548828125,,cuda +run_1763326775_optimized,1763326776.2603817,True,57,0,0.0030698776245117188,0,0,0.0,,cuda +run_1763326779_non_optimized,1763326779.9732888,False,62,0,0.3485872745513916,0,0,8.12548828125,,cuda +run_1763326779_optimized,1763326780.019289,True,62,15,0.0446467399597168,335.9707789086948,2.9764493306477866,0.0,,cuda +run_1763326783_non_optimized,1763326783.7825809,False,48,37,0.3960244655609131,93.42857125656288,10.703363934078732,8.12548828125,,cuda +run_1763326783_optimized,1763326783.8163621,True,48,11,0.032434701919555664,339.142934850523,2.9486092654141514,0.0,,cuda +run_1763326787_non_optimized,1763326787.5348887,False,65,0,0.33835744857788086,0,0,8.1259765625,,cuda +run_1763326787_optimized,1763326787.539357,True,65,0,0.0030851364135742188,0,0,0.0,,cuda +run_1763326790_non_optimized,1763326791.2828848,False,54,0,0.37782812118530273,0,0,8.12548828125,,cuda +run_1763326790_optimized,1763326791.2873359,True,54,0,0.003094911575317383,0,0,0.0,,cuda +run_1763326794_non_optimized,1763326794.9969409,False,60,15,0.3552377223968506,42.225245390023325,23.682514826456707,8.12548828125,,cuda +run_1763326794_optimized,1763326795.0417602,True,60,15,0.043425798416137695,345.41679248494296,2.895053227742513,0.0,,cuda +run_1763326798_non_optimized,1763326798.768397,False,56,0,0.3616046905517578,0,0,8.12548828125,,cuda +run_1763326798_optimized,1763326798.7728815,True,56,0,0.0030786991119384766,0,0,0.0,,cuda +run_1763326802_non_optimized,1763326802.5161467,False,55,8,0.367128849029541,21.790714680001297,45.89110612869263,8.12548828125,,cuda +run_1763326802_optimized,1763326802.5774758,True,55,21,0.05986332893371582,350.79906804468607,2.8506347111293246,0.0,,cuda +run_1763326805_non_optimized,1763326806.225911,False,86,14,0.2903730869293213,48.21383464992984,20.740934780665807,8.1259765625,,cuda +run_1763326805_optimized,1763326806.23062,True,86,0,0.003251314163208008,0,0,0.0,,cuda +run_1763326809_non_optimized,1763326809.8743262,False,87,0,0.2792494297027588,0,0,8.1259765625,,cuda +run_1763326809_optimized,1763326809.8790207,True,87,0,0.0032303333282470703,0,0,0.0,,cuda +run_1763326813_non_optimized,1763326813.5236838,False,92,8,0.2737746238708496,29.22111584663858,34.2218279838562,8.1259765625,,cuda +run_1763326813_optimized,1763326813.5475192,True,92,8,0.022355318069458008,357.8566842638511,2.794414758682251,0.0,,cuda +run_1763326816_non_optimized,1763326817.1884215,False,91,9,0.2681541442871094,33.56278540436731,29.794904920789932,8.1259765625,,cuda +run_1763326816_optimized,1763326817.193147,True,91,0,0.0032160282135009766,0,0,0.0,,cuda +run_1763326820_non_optimized,1763326820.8178222,False,96,0,0.25312256813049316,0,0,8.1259765625,,cuda +run_1763326820_optimized,1763326820.8225522,True,96,0,0.003218412399291992,0,0,0.0,,cuda +run_1763326824_non_optimized,1763326824.1973226,False,100,0,0.00023245811462402344,0,0,0.0009765625,,cuda +run_1763326824_optimized,1763326824.198983,True,100,0,0.0003190040588378906,0,0,0.0,,cuda +run_1763326827_non_optimized,1763326827.7810934,False,98,2,0.24810361862182617,8.061148044150517,124.05180931091309,8.1259765625,,cuda +run_1763326827_optimized,1763326827.7893782,True,98,2,0.006571054458618164,304.3651536591561,3.285527229309082,0.0,,cuda +run_1763326831_non_optimized,1763326831.4181712,False,95,5,0.26477837562561035,18.883717328449315,52.95567512512207,8.1259765625,,cuda +run_1763326831_optimized,1763326831.425718,True,95,1,0.00593876838684082,168.38508169737847,5.93876838684082,0.0,,cuda +run_1763326834_non_optimized,1763326835.0461798,False,99,1,0.25382065773010254,3.9397896488919324,253.82065773010254,8.1259765625,,cuda +run_1763326834_optimized,1763326835.0514338,True,99,1,0.0036373138427734375,274.9281594126901,3.6373138427734375,0.0,,cuda +run_1763326838_non_optimized,1763326838.404944,False,107,0,0.0002460479736328125,0,0,0.0009765625,,cuda +run_1763326838_optimized,1763326838.4067247,True,107,0,0.00032591819763183594,0,0,0.0,,cuda +run_1763326841_non_optimized,1763326841.7476485,False,111,0,0.00024366378784179688,0,0,0.0009765625,,cuda +run_1763326841_optimized,1763326841.7494369,True,111,0,0.000324249267578125,0,0,0.0,,cuda +run_1763326845_non_optimized,1763326845.076511,False,104,0,0.0002353191375732422,0,0,0.0009765625,,cuda +run_1763326845_optimized,1763326845.078336,True,104,0,0.0003311634063720703,0,0,0.0,,cuda +run_1763326848_non_optimized,1763326848.650482,False,97,3,0.2599804401397705,11.539329644903832,86.66014671325684,8.1259765625,,cuda +run_1763326848_optimized,1763326848.655401,True,97,0,0.0032651424407958984,0,0,0.0,,cuda +run_1763326851_non_optimized,1763326852.0037806,False,102,0,0.00024008750915527344,0,0,0.0009765625,,cuda +run_1763326851_optimized,1763326852.0057652,True,102,0,0.00032830238342285156,0,0,0.0,,cuda +run_1763326855_non_optimized,1763326855.3215888,False,102,0,0.0002357959747314453,0,0,0.0009765625,,cuda +run_1763326855_optimized,1763326855.3233974,True,102,0,0.0003199577331542969,0,0,0.0,,cuda +run_1763326858_non_optimized,1763326858.6427326,False,105,0,0.0002455711364746094,0,0,0.0009765625,,cuda +run_1763326858_optimized,1763326858.6446369,True,105,0,0.0003333091735839844,0,0,0.0,,cuda +run_1763326861_non_optimized,1763326862.2181683,False,94,0,0.26184773445129395,0,0,8.1259765625,,cuda +run_1763326861_optimized,1763326862.2230783,True,94,0,0.0032091140747070312,0,0,0.0,,cuda +run_1763326865_non_optimized,1763326865.582198,False,102,0,0.000240325927734375,0,0,0.0009765625,,cuda +run_1763326865_optimized,1763326865.5841131,True,102,0,0.0003209114074707031,0,0,0.0,,cuda +run_1763326868_non_optimized,1763326868.9020677,False,103,0,0.0002429485321044922,0,0,0.0009765625,,cuda +run_1763326868_optimized,1763326868.9039805,True,103,0,0.00033211708068847656,0,0,0.0,,cuda +run_1763326872_non_optimized,1763326872.2377539,False,107,0,0.00024199485778808594,0,0,0.0009765625,,cuda +run_1763326872_optimized,1763326872.2396657,True,107,0,0.00032520294189453125,0,0,0.0,,cuda +run_1763326875_non_optimized,1763326875.572948,False,110,0,0.0002472400665283203,0,0,0.0009765625,,cuda +run_1763326875_optimized,1763326875.5749235,True,110,0,0.000339508056640625,0,0,0.0,,cuda +run_1763326878_non_optimized,1763326878.917066,False,106,0,0.00023293495178222656,0,0,0.0009765625,,cuda +run_1763326878_optimized,1763326878.9190118,True,106,0,0.00032639503479003906,0,0,0.0,,cuda +run_1763326882_non_optimized,1763326882.2492902,False,109,0,0.00023746490478515625,0,0,0.0009765625,,cuda +run_1763326882_optimized,1763326882.251233,True,109,0,0.0003230571746826172,0,0,0.0,,cuda +run_1763326885_non_optimized,1763326885.5688546,False,110,0,0.0002415180206298828,0,0,0.0009765625,,cuda +run_1763326885_optimized,1763326885.5708404,True,110,0,0.00032401084899902344,0,0,0.0,,cuda +run_1763326888_non_optimized,1763326888.8873081,False,114,0,0.00024056434631347656,0,0,0.0009765625,,cuda +run_1763326888_optimized,1763326888.8893402,True,114,0,0.0003287792205810547,0,0,0.0,,cuda +run_1763326892_non_optimized,1763326892.2040594,False,113,0,0.0002467632293701172,0,0,0.0009765625,,cuda +run_1763326892_optimized,1763326892.2060897,True,113,0,0.00032711029052734375,0,0,0.0,,cuda +run_1763326895_non_optimized,1763326895.5235405,False,113,0,0.00023317337036132812,0,0,0.0009765625,,cuda +run_1763326895_optimized,1763326895.5255647,True,113,0,0.0003211498260498047,0,0,0.0,,cuda +run_1763326898_non_optimized,1763326898.837703,False,112,0,0.0002307891845703125,0,0,0.0009765625,,cuda +run_1763326898_optimized,1763326898.839746,True,112,0,0.00032401084899902344,0,0,0.0,,cuda +run_1763326902_non_optimized,1763326902.1575205,False,109,0,0.0002338886260986328,0,0,0.0009765625,,cuda +run_1763326902_optimized,1763326902.1595387,True,109,0,0.0003142356872558594,0,0,0.0,,cuda +run_1763326905_non_optimized,1763326905.4874718,False,102,0,0.00023674964904785156,0,0,0.0009765625,,cuda +run_1763326905_optimized,1763326905.4895573,True,102,0,0.00033164024353027344,0,0,0.0,,cuda +run_1763326908_non_optimized,1763326908.803102,False,105,0,0.00025081634521484375,0,0,0.0009765625,,cuda +run_1763326908_optimized,1763326908.8052208,True,105,0,0.00031828880310058594,0,0,0.0,,cuda +run_1763326912_non_optimized,1763326912.1360228,False,109,0,0.00023627281188964844,0,0,0.0009765625,,cuda +run_1763326912_optimized,1763326912.1381037,True,109,0,0.0003142356872558594,0,0,0.0,,cuda +run_1763326915_non_optimized,1763326915.4476118,False,103,0,0.0002300739288330078,0,0,0.0009765625,,cuda +run_1763326915_optimized,1763326915.449725,True,103,0,0.0003192424774169922,0,0,0.0,,cuda +run_1763326918_non_optimized,1763326918.7690768,False,102,0,0.00023889541625976562,0,0,0.0009765625,,cuda +run_1763326918_optimized,1763326918.7712831,True,102,0,0.00032782554626464844,0,0,0.0,,cuda +run_1763326922_non_optimized,1763326922.0759387,False,107,0,0.0002472400665283203,0,0,0.0009765625,,cuda +run_1763326922_optimized,1763326922.078079,True,107,0,0.000324249267578125,0,0,0.0,,cuda +run_1763326925_non_optimized,1763326925.3886678,False,112,0,0.00023698806762695312,0,0,0.0009765625,,cuda +run_1763326925_optimized,1763326925.3908367,True,112,0,0.00032520294189453125,0,0,0.0,,cuda +run_1763326928_non_optimized,1763326928.6967072,False,105,0,0.0002300739288330078,0,0,0.0009765625,,cuda +run_1763326928_optimized,1763326928.6988983,True,105,0,0.0003209114074707031,0,0,0.0,,cuda +run_1763326932_non_optimized,1763326932.2863104,False,97,3,0.25337719917297363,11.840055102795507,84.45906639099121,8.1259765625,,cuda +run_1763326932_optimized,1763326932.2974646,True,97,3,0.008779525756835938,341.70410601781447,2.926508585611979,0.0,,cuda +run_1763326935_non_optimized,1763326935.6391814,False,102,0,0.00023484230041503906,0,0,0.0009765625,,cuda +run_1763326935_optimized,1763326935.6413832,True,102,0,0.00031876564025878906,0,0,0.0,,cuda +run_1763326938_non_optimized,1763326938.9412687,False,102,0,0.00023102760314941406,0,0,0.0009765625,,cuda +run_1763326938_optimized,1763326938.9434795,True,102,0,0.0003178119659423828,0,0,0.0,,cuda +run_1763326942_non_optimized,1763326942.2639933,False,109,0,0.00024819374084472656,0,0,0.0009765625,,cuda +run_1763326942_optimized,1763326942.266253,True,109,0,0.00033283233642578125,0,0,0.0,,cuda +run_1763326945_non_optimized,1763326945.5876389,False,112,0,0.00024247169494628906,0,0,0.0009765625,,cuda +run_1763326945_optimized,1763326945.5899763,True,112,0,0.0003368854522705078,0,0,0.0,,cuda +run_1763326948_non_optimized,1763326948.9048371,False,107,0,0.0002422332763671875,0,0,0.0009765625,,cuda +run_1763326948_optimized,1763326948.9071276,True,107,0,0.0003273487091064453,0,0,0.0,,cuda +run_1763326952_non_optimized,1763326952.2211714,False,106,0,0.00023603439331054688,0,0,0.0009765625,,cuda +run_1763326952_optimized,1763326952.2234442,True,106,0,0.00032329559326171875,0,0,0.0,,cuda +run_1763326955_non_optimized,1763326955.5458589,False,105,0,0.0002446174621582031,0,0,0.0009765625,,cuda +run_1763326955_optimized,1763326955.5481744,True,105,0,0.00031685829162597656,0,0,0.0,,cuda +run_1763326958_non_optimized,1763326958.8506002,False,102,0,0.00022912025451660156,0,0,0.0009765625,,cuda +run_1763326958_optimized,1763326958.85293,True,102,0,0.0003216266632080078,0,0,0.0,,cuda +run_1763326962_non_optimized,1763326962.1738048,False,106,0,0.00024080276489257812,0,0,0.0009765625,,cuda +run_1763326962_optimized,1763326962.1762178,True,106,0,0.0003228187561035156,0,0,0.0,,cuda +run_1763326965_non_optimized,1763326965.4970171,False,105,0,0.00023508071899414062,0,0,0.0009765625,,cuda +run_1763326965_optimized,1763326965.4993877,True,105,0,0.0003216266632080078,0,0,0.0,,cuda +run_1763326968_non_optimized,1763326968.8236582,False,105,0,0.0002491474151611328,0,0,0.0009765625,,cuda +run_1763326968_optimized,1763326968.826099,True,105,0,0.0003414154052734375,0,0,0.0,,cuda +run_1763326972_non_optimized,1763326972.1689265,False,110,0,0.00025177001953125,0,0,0.0009765625,,cuda +run_1763326972_optimized,1763326972.171371,True,110,0,0.0003230571746826172,0,0,0.0,,cuda diff --git a/inference_benchmarks/inference_metrics.json b/inference_benchmarks/inference_metrics.json new file mode 100644 index 0000000..58ae38c --- /dev/null +++ b/inference_benchmarks/inference_metrics.json @@ -0,0 +1,2604 @@ +{ + "runs": [ + { + "run_name": "run_1763326618_non_optimized", + "timestamp": 1763326618.9855335, + "optimized": false, + "prompt_length": 53, + "generated_length": 5, + "total_time": 0.4859466552734375, + "tokens_per_second": 10.289195214619902, + "time_per_token": 97.1893310546875, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326618_optimized", + "timestamp": 1763326619.026237, + "optimized": true, + "prompt_length": 53, + "generated_length": 14, + "total_time": 0.04017782211303711, + "tokens_per_second": 348.4509429259782, + "time_per_token": 2.869844436645508, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326622_non_optimized", + "timestamp": 1763326622.7140436, + "optimized": false, + "prompt_length": 66, + "generated_length": 0, + "total_time": 0.3553469181060791, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326622_optimized", + "timestamp": 1763326622.717709, + "optimized": true, + "prompt_length": 66, + "generated_length": 0, + "total_time": 0.003069162368774414, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326626_non_optimized", + "timestamp": 1763326626.4152546, + "optimized": false, + "prompt_length": 60, + "generated_length": 14, + "total_time": 0.3536839485168457, + "tokens_per_second": 39.58336265671155, + "time_per_token": 25.263139179774694, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326626_optimized", + "timestamp": 1763326626.4510481, + "optimized": true, + "prompt_length": 60, + "generated_length": 12, + "total_time": 0.03518319129943848, + "tokens_per_second": 341.0719595579017, + "time_per_token": 2.9319326082865396, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326629_non_optimized", + "timestamp": 1763326630.1408024, + "optimized": false, + "prompt_length": 61, + "generated_length": 16, + "total_time": 0.35126209259033203, + "tokens_per_second": 45.55003325867101, + "time_per_token": 21.953880786895752, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326629_optimized", + "timestamp": 1763326630.1444962, + "optimized": true, + "prompt_length": 61, + "generated_length": 0, + "total_time": 0.003066539764404297, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326633_non_optimized", + "timestamp": 1763326633.8381388, + "optimized": false, + "prompt_length": 60, + "generated_length": 0, + "total_time": 0.35619688034057617, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326633_optimized", + "timestamp": 1763326633.8418753, + "optimized": true, + "prompt_length": 60, + "generated_length": 0, + "total_time": 0.003074169158935547, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326637_non_optimized", + "timestamp": 1763326637.540628, + "optimized": false, + "prompt_length": 64, + "generated_length": 8, + "total_time": 0.3398597240447998, + "tokens_per_second": 23.539123450078044, + "time_per_token": 42.482465505599976, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326637_optimized", + "timestamp": 1763326637.5471995, + "optimized": true, + "prompt_length": 64, + "generated_length": 1, + "total_time": 0.005898714065551758, + "tokens_per_second": 169.52847500101046, + "time_per_token": 5.898714065551758, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326640_non_optimized", + "timestamp": 1763326641.2843578, + "optimized": false, + "prompt_length": 54, + "generated_length": 30, + "total_time": 0.37198519706726074, + "tokens_per_second": 80.64837051721585, + "time_per_token": 12.399506568908691, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326640_optimized", + "timestamp": 1763326641.3555984, + "optimized": true, + "prompt_length": 54, + "generated_length": 25, + "total_time": 0.0705265998840332, + "tokens_per_second": 354.4761840370508, + "time_per_token": 2.821063995361328, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326644_non_optimized", + "timestamp": 1763326645.0330086, + "optimized": false, + "prompt_length": 67, + "generated_length": 0, + "total_time": 0.333660364151001, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326644_optimized", + "timestamp": 1763326645.0367913, + "optimized": true, + "prompt_length": 67, + "generated_length": 0, + "total_time": 0.0030765533447265625, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326648_non_optimized", + "timestamp": 1763326648.755835, + "optimized": false, + "prompt_length": 63, + "generated_length": 17, + "total_time": 0.34771132469177246, + "tokens_per_second": 48.891131213714694, + "time_per_token": 20.453607334810144, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326648_optimized", + "timestamp": 1763326648.7840471, + "optimized": true, + "prompt_length": 63, + "generated_length": 9, + "total_time": 0.027483701705932617, + "tokens_per_second": 327.46680546519195, + "time_per_token": 3.053744633992513, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326652_non_optimized", + "timestamp": 1763326652.4592273, + "optimized": false, + "prompt_length": 73, + "generated_length": 19, + "total_time": 0.31671810150146484, + "tokens_per_second": 59.990256035025276, + "time_per_token": 16.66937376323499, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326652_optimized", + "timestamp": 1763326652.4657354, + "optimized": true, + "prompt_length": 73, + "generated_length": 1, + "total_time": 0.005753517150878906, + "tokens_per_second": 173.80672965357203, + "time_per_token": 5.753517150878906, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326655_non_optimized", + "timestamp": 1763326656.2020977, + "optimized": false, + "prompt_length": 46, + "generated_length": 0, + "total_time": 0.392134428024292, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326655_optimized", + "timestamp": 1763326656.2059286, + "optimized": true, + "prompt_length": 46, + "generated_length": 0, + "total_time": 0.003073453903198242, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326659_non_optimized", + "timestamp": 1763326659.8722565, + "optimized": false, + "prompt_length": 70, + "generated_length": 17, + "total_time": 0.33252859115600586, + "tokens_per_second": 51.12342352548099, + "time_per_token": 19.56050536211799, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326659_optimized", + "timestamp": 1763326659.913114, + "optimized": true, + "prompt_length": 70, + "generated_length": 14, + "total_time": 0.040065765380859375, + "tokens_per_second": 349.42549747691135, + "time_per_token": 2.861840384347098, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326663_non_optimized", + "timestamp": 1763326663.6047928, + "optimized": false, + "prompt_length": 66, + "generated_length": 14, + "total_time": 0.3415799140930176, + "tokens_per_second": 40.986016514389014, + "time_per_token": 24.3985652923584, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326663_optimized", + "timestamp": 1763326663.6473217, + "optimized": true, + "prompt_length": 66, + "generated_length": 14, + "total_time": 0.041661977767944336, + "tokens_per_second": 336.037815534814, + "time_per_token": 2.9758555548531667, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326666_non_optimized", + "timestamp": 1763326667.383774, + "optimized": false, + "prompt_length": 50, + "generated_length": 0, + "total_time": 0.38038086891174316, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326666_optimized", + "timestamp": 1763326667.4496503, + "optimized": true, + "prompt_length": 50, + "generated_length": 23, + "total_time": 0.06507229804992676, + "tokens_per_second": 353.45301594164135, + "time_per_token": 2.829230349996816, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326670_non_optimized", + "timestamp": 1763326671.1408262, + "optimized": false, + "prompt_length": 66, + "generated_length": 0, + "total_time": 0.34095191955566406, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326670_optimized", + "timestamp": 1763326671.1447349, + "optimized": true, + "prompt_length": 66, + "generated_length": 0, + "total_time": 0.0030736923217773438, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326674_non_optimized", + "timestamp": 1763326674.9176548, + "optimized": false, + "prompt_length": 43, + "generated_length": 43, + "total_time": 0.4153327941894531, + "tokens_per_second": 103.53143455458913, + "time_per_token": 9.6589021904524, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326674_optimized", + "timestamp": 1763326675.01074, + "optimized": true, + "prompt_length": 43, + "generated_length": 32, + "total_time": 0.09222531318664551, + "tokens_per_second": 346.97632238167006, + "time_per_token": 2.882041037082672, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326678_non_optimized", + "timestamp": 1763326678.7116284, + "optimized": false, + "prompt_length": 61, + "generated_length": 0, + "total_time": 0.34818243980407715, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326678_optimized", + "timestamp": 1763326678.7930346, + "optimized": true, + "prompt_length": 61, + "generated_length": 29, + "total_time": 0.08052730560302734, + "tokens_per_second": 360.1262923530596, + "time_per_token": 2.776803641483702, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326682_non_optimized", + "timestamp": 1763326682.5197363, + "optimized": false, + "prompt_length": 58, + "generated_length": 0, + "total_time": 0.3616158962249756, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326682_optimized", + "timestamp": 1763326682.579515, + "optimized": true, + "prompt_length": 58, + "generated_length": 19, + "total_time": 0.05885767936706543, + "tokens_per_second": 322.8125913953667, + "time_per_token": 3.0977725982666016, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326685_non_optimized", + "timestamp": 1763326686.2714303, + "optimized": false, + "prompt_length": 66, + "generated_length": 0, + "total_time": 0.3391129970550537, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326685_optimized", + "timestamp": 1763326686.275417, + "optimized": true, + "prompt_length": 66, + "generated_length": 0, + "total_time": 0.0030994415283203125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326689_non_optimized", + "timestamp": 1763326689.9624639, + "optimized": false, + "prompt_length": 67, + "generated_length": 28, + "total_time": 0.3353545665740967, + "tokens_per_second": 83.4937191583267, + "time_per_token": 11.976948806217738, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326689_optimized", + "timestamp": 1763326689.9768, + "optimized": true, + "prompt_length": 67, + "generated_length": 3, + "total_time": 0.011225700378417969, + "tokens_per_second": 267.24390451108655, + "time_per_token": 3.7419001261393228, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326693_non_optimized", + "timestamp": 1763326693.711334, + "optimized": false, + "prompt_length": 59, + "generated_length": 24, + "total_time": 0.3620493412017822, + "tokens_per_second": 66.28930719866715, + "time_per_token": 15.085389216740927, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326693_optimized", + "timestamp": 1763326693.7152996, + "optimized": true, + "prompt_length": 59, + "generated_length": 0, + "total_time": 0.0030515193939208984, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326697_non_optimized", + "timestamp": 1763326697.4116411, + "optimized": false, + "prompt_length": 63, + "generated_length": 0, + "total_time": 0.3452737331390381, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326697_optimized", + "timestamp": 1763326697.4156432, + "optimized": true, + "prompt_length": 63, + "generated_length": 0, + "total_time": 0.0030455589294433594, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326700_non_optimized", + "timestamp": 1763326701.1587586, + "optimized": false, + "prompt_length": 53, + "generated_length": 19, + "total_time": 0.3794400691986084, + "tokens_per_second": 50.07378382606958, + "time_per_token": 19.970529957821494, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326700_optimized", + "timestamp": 1763326701.2194357, + "optimized": true, + "prompt_length": 53, + "generated_length": 21, + "total_time": 0.05968618392944336, + "tokens_per_second": 351.84021858098123, + "time_per_token": 2.8421992347353977, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326704_non_optimized", + "timestamp": 1763326704.906457, + "optimized": false, + "prompt_length": 67, + "generated_length": 0, + "total_time": 0.3325045108795166, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326704_optimized", + "timestamp": 1763326704.9105146, + "optimized": true, + "prompt_length": 67, + "generated_length": 0, + "total_time": 0.0030672550201416016, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326708_non_optimized", + "timestamp": 1763326708.6155152, + "optimized": false, + "prompt_length": 64, + "generated_length": 4, + "total_time": 0.3434574604034424, + "tokens_per_second": 11.646274899084734, + "time_per_token": 85.8643651008606, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326708_optimized", + "timestamp": 1763326708.6492803, + "optimized": true, + "prompt_length": 64, + "generated_length": 11, + "total_time": 0.03272819519042969, + "tokens_per_second": 336.1016376245702, + "time_per_token": 2.9752904718572446, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326712_non_optimized", + "timestamp": 1763326712.3875144, + "optimized": false, + "prompt_length": 54, + "generated_length": 5, + "total_time": 0.3697817325592041, + "tokens_per_second": 13.521490002753103, + "time_per_token": 73.95634651184082, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326712_optimized", + "timestamp": 1763326712.3916335, + "optimized": true, + "prompt_length": 54, + "generated_length": 0, + "total_time": 0.0030570030212402344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326715_non_optimized", + "timestamp": 1763326716.129369, + "optimized": false, + "prompt_length": 51, + "generated_length": 0, + "total_time": 0.38543081283569336, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326715_optimized", + "timestamp": 1763326716.133514, + "optimized": true, + "prompt_length": 51, + "generated_length": 0, + "total_time": 0.003108978271484375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326719_non_optimized", + "timestamp": 1763326719.845239, + "optimized": false, + "prompt_length": 60, + "generated_length": 10, + "total_time": 0.35239410400390625, + "tokens_per_second": 28.377319275152093, + "time_per_token": 35.239410400390625, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326719_optimized", + "timestamp": 1763326719.8974886, + "optimized": true, + "prompt_length": 60, + "generated_length": 18, + "total_time": 0.051172733306884766, + "tokens_per_second": 351.7498252839718, + "time_per_token": 2.842929628160265, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326723_non_optimized", + "timestamp": 1763326723.6197646, + "optimized": false, + "prompt_length": 56, + "generated_length": 0, + "total_time": 0.3644559383392334, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326723_optimized", + "timestamp": 1763326723.6239054, + "optimized": true, + "prompt_length": 56, + "generated_length": 0, + "total_time": 0.0030570030212402344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326726_non_optimized", + "timestamp": 1763326727.3191404, + "optimized": false, + "prompt_length": 65, + "generated_length": 0, + "total_time": 0.34804534912109375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326726_optimized", + "timestamp": 1763326727.3233125, + "optimized": true, + "prompt_length": 65, + "generated_length": 0, + "total_time": 0.0030825138092041016, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326730_non_optimized", + "timestamp": 1763326731.044284, + "optimized": false, + "prompt_length": 59, + "generated_length": 16, + "total_time": 0.35839104652404785, + "tokens_per_second": 44.64397242950211, + "time_per_token": 22.39944040775299, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326730_optimized", + "timestamp": 1763326731.048486, + "optimized": true, + "prompt_length": 59, + "generated_length": 0, + "total_time": 0.0030989646911621094, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326734_non_optimized", + "timestamp": 1763326734.8039234, + "optimized": false, + "prompt_length": 54, + "generated_length": 0, + "total_time": 0.3762962818145752, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326734_optimized", + "timestamp": 1763326734.84245, + "optimized": true, + "prompt_length": 54, + "generated_length": 13, + "total_time": 0.03740096092224121, + "tokens_per_second": 347.58465235766965, + "time_per_token": 2.8769969940185547, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326738_non_optimized", + "timestamp": 1763326738.6017425, + "optimized": false, + "prompt_length": 54, + "generated_length": 41, + "total_time": 0.370577335357666, + "tokens_per_second": 110.63817478321626, + "time_per_token": 9.038471594089415, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326738_optimized", + "timestamp": 1763326738.6901221, + "optimized": true, + "prompt_length": 54, + "generated_length": 31, + "total_time": 0.08722376823425293, + "tokens_per_second": 355.4077131447096, + "time_per_token": 2.813669943040417, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326742_non_optimized", + "timestamp": 1763326742.419483, + "optimized": false, + "prompt_length": 59, + "generated_length": 12, + "total_time": 0.3536853790283203, + "tokens_per_second": 33.928459335716944, + "time_per_token": 29.47378158569336, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326742_optimized", + "timestamp": 1763326742.4236658, + "optimized": true, + "prompt_length": 59, + "generated_length": 0, + "total_time": 0.0030400753021240234, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326745_non_optimized", + "timestamp": 1763326746.1228704, + "optimized": false, + "prompt_length": 65, + "generated_length": 19, + "total_time": 0.3396615982055664, + "tokens_per_second": 55.93802802665087, + "time_per_token": 17.8769262213456, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326745_optimized", + "timestamp": 1763326746.1515133, + "optimized": true, + "prompt_length": 65, + "generated_length": 9, + "total_time": 0.02747321128845215, + "tokens_per_second": 327.59184594423374, + "time_per_token": 3.052579032050239, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326749_non_optimized", + "timestamp": 1763326749.8801262, + "optimized": false, + "prompt_length": 60, + "generated_length": 0, + "total_time": 0.35973238945007324, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326749_optimized", + "timestamp": 1763326749.9388814, + "optimized": true, + "prompt_length": 60, + "generated_length": 20, + "total_time": 0.05757498741149902, + "tokens_per_second": 347.37306770136695, + "time_per_token": 2.878749370574951, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326753_non_optimized", + "timestamp": 1763326753.6588142, + "optimized": false, + "prompt_length": 56, + "generated_length": 22, + "total_time": 0.3657264709472656, + "tokens_per_second": 60.154245720901606, + "time_per_token": 16.62393049760298, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326753_optimized", + "timestamp": 1763326753.6631608, + "optimized": true, + "prompt_length": 56, + "generated_length": 0, + "total_time": 0.0031464099884033203, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326757_non_optimized", + "timestamp": 1763326757.3905053, + "optimized": false, + "prompt_length": 59, + "generated_length": 33, + "total_time": 0.36070775985717773, + "tokens_per_second": 91.4868036469921, + "time_per_token": 10.930538177490234, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326757_optimized", + "timestamp": 1763326757.4786851, + "optimized": true, + "prompt_length": 59, + "generated_length": 31, + "total_time": 0.08695292472839355, + "tokens_per_second": 356.5147474548062, + "time_per_token": 2.804933055754631, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326760_non_optimized", + "timestamp": 1763326761.2264946, + "optimized": false, + "prompt_length": 49, + "generated_length": 17, + "total_time": 0.38045310974121094, + "tokens_per_second": 44.683561691908935, + "time_per_token": 22.379594690659466, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326760_optimized", + "timestamp": 1763326761.3133712, + "optimized": true, + "prompt_length": 49, + "generated_length": 31, + "total_time": 0.08566641807556152, + "tokens_per_second": 361.8687543659949, + "time_per_token": 2.763432841147146, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326764_non_optimized", + "timestamp": 1763326765.0356681, + "optimized": false, + "prompt_length": 57, + "generated_length": 37, + "total_time": 0.3631327152252197, + "tokens_per_second": 101.89112258049266, + "time_per_token": 9.814397708789722, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326764_optimized", + "timestamp": 1763326765.0750856, + "optimized": true, + "prompt_length": 57, + "generated_length": 13, + "total_time": 0.03816509246826172, + "tokens_per_second": 340.62540293360655, + "time_per_token": 2.9357763437124396, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326768_non_optimized", + "timestamp": 1763326768.767545, + "optimized": false, + "prompt_length": 68, + "generated_length": 19, + "total_time": 0.331697940826416, + "tokens_per_second": 57.281030906197486, + "time_per_token": 17.457786359285052, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326768_optimized", + "timestamp": 1763326768.7959456, + "optimized": true, + "prompt_length": 68, + "generated_length": 9, + "total_time": 0.027106285095214844, + "tokens_per_second": 332.0263167153362, + "time_per_token": 3.011809455023872, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326772_non_optimized", + "timestamp": 1763326772.5260317, + "optimized": false, + "prompt_length": 57, + "generated_length": 10, + "total_time": 0.36326122283935547, + "tokens_per_second": 27.52839932056906, + "time_per_token": 36.32612228393555, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326772_optimized", + "timestamp": 1763326772.5303962, + "optimized": true, + "prompt_length": 57, + "generated_length": 0, + "total_time": 0.003071308135986328, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326775_non_optimized", + "timestamp": 1763326776.254764, + "optimized": false, + "prompt_length": 57, + "generated_length": 25, + "total_time": 0.3601036071777344, + "tokens_per_second": 69.42446424220596, + "time_per_token": 14.404144287109375, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326775_optimized", + "timestamp": 1763326776.2603817, + "optimized": true, + "prompt_length": 57, + "generated_length": 0, + "total_time": 0.0030698776245117188, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326779_non_optimized", + "timestamp": 1763326779.9732888, + "optimized": false, + "prompt_length": 62, + "generated_length": 0, + "total_time": 0.3485872745513916, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326779_optimized", + "timestamp": 1763326780.019289, + "optimized": true, + "prompt_length": 62, + "generated_length": 15, + "total_time": 0.0446467399597168, + "tokens_per_second": 335.9707789086948, + "time_per_token": 2.9764493306477866, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326783_non_optimized", + "timestamp": 1763326783.7825809, + "optimized": false, + "prompt_length": 48, + "generated_length": 37, + "total_time": 0.3960244655609131, + "tokens_per_second": 93.42857125656288, + "time_per_token": 10.703363934078732, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326783_optimized", + "timestamp": 1763326783.8163621, + "optimized": true, + "prompt_length": 48, + "generated_length": 11, + "total_time": 0.032434701919555664, + "tokens_per_second": 339.142934850523, + "time_per_token": 2.9486092654141514, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326787_non_optimized", + "timestamp": 1763326787.5348887, + "optimized": false, + "prompt_length": 65, + "generated_length": 0, + "total_time": 0.33835744857788086, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326787_optimized", + "timestamp": 1763326787.539357, + "optimized": true, + "prompt_length": 65, + "generated_length": 0, + "total_time": 0.0030851364135742188, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326790_non_optimized", + "timestamp": 1763326791.2828848, + "optimized": false, + "prompt_length": 54, + "generated_length": 0, + "total_time": 0.37782812118530273, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326790_optimized", + "timestamp": 1763326791.2873359, + "optimized": true, + "prompt_length": 54, + "generated_length": 0, + "total_time": 0.003094911575317383, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326794_non_optimized", + "timestamp": 1763326794.9969409, + "optimized": false, + "prompt_length": 60, + "generated_length": 15, + "total_time": 0.3552377223968506, + "tokens_per_second": 42.225245390023325, + "time_per_token": 23.682514826456707, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326794_optimized", + "timestamp": 1763326795.0417602, + "optimized": true, + "prompt_length": 60, + "generated_length": 15, + "total_time": 0.043425798416137695, + "tokens_per_second": 345.41679248494296, + "time_per_token": 2.895053227742513, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326798_non_optimized", + "timestamp": 1763326798.768397, + "optimized": false, + "prompt_length": 56, + "generated_length": 0, + "total_time": 0.3616046905517578, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326798_optimized", + "timestamp": 1763326798.7728815, + "optimized": true, + "prompt_length": 56, + "generated_length": 0, + "total_time": 0.0030786991119384766, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326802_non_optimized", + "timestamp": 1763326802.5161467, + "optimized": false, + "prompt_length": 55, + "generated_length": 8, + "total_time": 0.367128849029541, + "tokens_per_second": 21.790714680001297, + "time_per_token": 45.89110612869263, + "memory_used_mb": 8.12548828125, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326802_optimized", + "timestamp": 1763326802.5774758, + "optimized": true, + "prompt_length": 55, + "generated_length": 21, + "total_time": 0.05986332893371582, + "tokens_per_second": 350.79906804468607, + "time_per_token": 2.8506347111293246, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326805_non_optimized", + "timestamp": 1763326806.225911, + "optimized": false, + "prompt_length": 86, + "generated_length": 14, + "total_time": 0.2903730869293213, + "tokens_per_second": 48.21383464992984, + "time_per_token": 20.740934780665807, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326805_optimized", + "timestamp": 1763326806.23062, + "optimized": true, + "prompt_length": 86, + "generated_length": 0, + "total_time": 0.003251314163208008, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326809_non_optimized", + "timestamp": 1763326809.8743262, + "optimized": false, + "prompt_length": 87, + "generated_length": 0, + "total_time": 0.2792494297027588, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326809_optimized", + "timestamp": 1763326809.8790207, + "optimized": true, + "prompt_length": 87, + "generated_length": 0, + "total_time": 0.0032303333282470703, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326813_non_optimized", + "timestamp": 1763326813.5236838, + "optimized": false, + "prompt_length": 92, + "generated_length": 8, + "total_time": 0.2737746238708496, + "tokens_per_second": 29.22111584663858, + "time_per_token": 34.2218279838562, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326813_optimized", + "timestamp": 1763326813.5475192, + "optimized": true, + "prompt_length": 92, + "generated_length": 8, + "total_time": 0.022355318069458008, + "tokens_per_second": 357.8566842638511, + "time_per_token": 2.794414758682251, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326816_non_optimized", + "timestamp": 1763326817.1884215, + "optimized": false, + "prompt_length": 91, + "generated_length": 9, + "total_time": 0.2681541442871094, + "tokens_per_second": 33.56278540436731, + "time_per_token": 29.794904920789932, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326816_optimized", + "timestamp": 1763326817.193147, + "optimized": true, + "prompt_length": 91, + "generated_length": 0, + "total_time": 0.0032160282135009766, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326820_non_optimized", + "timestamp": 1763326820.8178222, + "optimized": false, + "prompt_length": 96, + "generated_length": 0, + "total_time": 0.25312256813049316, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326820_optimized", + "timestamp": 1763326820.8225522, + "optimized": true, + "prompt_length": 96, + "generated_length": 0, + "total_time": 0.003218412399291992, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326824_non_optimized", + "timestamp": 1763326824.1973226, + "optimized": false, + "prompt_length": 100, + "generated_length": 0, + "total_time": 0.00023245811462402344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326824_optimized", + "timestamp": 1763326824.198983, + "optimized": true, + "prompt_length": 100, + "generated_length": 0, + "total_time": 0.0003190040588378906, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326827_non_optimized", + "timestamp": 1763326827.7810934, + "optimized": false, + "prompt_length": 98, + "generated_length": 2, + "total_time": 0.24810361862182617, + "tokens_per_second": 8.061148044150517, + "time_per_token": 124.05180931091309, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326827_optimized", + "timestamp": 1763326827.7893782, + "optimized": true, + "prompt_length": 98, + "generated_length": 2, + "total_time": 0.006571054458618164, + "tokens_per_second": 304.3651536591561, + "time_per_token": 3.285527229309082, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326831_non_optimized", + "timestamp": 1763326831.4181712, + "optimized": false, + "prompt_length": 95, + "generated_length": 5, + "total_time": 0.26477837562561035, + "tokens_per_second": 18.883717328449315, + "time_per_token": 52.95567512512207, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326831_optimized", + "timestamp": 1763326831.425718, + "optimized": true, + "prompt_length": 95, + "generated_length": 1, + "total_time": 0.00593876838684082, + "tokens_per_second": 168.38508169737847, + "time_per_token": 5.93876838684082, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326834_non_optimized", + "timestamp": 1763326835.0461798, + "optimized": false, + "prompt_length": 99, + "generated_length": 1, + "total_time": 0.25382065773010254, + "tokens_per_second": 3.9397896488919324, + "time_per_token": 253.82065773010254, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326834_optimized", + "timestamp": 1763326835.0514338, + "optimized": true, + "prompt_length": 99, + "generated_length": 1, + "total_time": 0.0036373138427734375, + "tokens_per_second": 274.9281594126901, + "time_per_token": 3.6373138427734375, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326838_non_optimized", + "timestamp": 1763326838.404944, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.0002460479736328125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326838_optimized", + "timestamp": 1763326838.4067247, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.00032591819763183594, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326841_non_optimized", + "timestamp": 1763326841.7476485, + "optimized": false, + "prompt_length": 111, + "generated_length": 0, + "total_time": 0.00024366378784179688, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326841_optimized", + "timestamp": 1763326841.7494369, + "optimized": true, + "prompt_length": 111, + "generated_length": 0, + "total_time": 0.000324249267578125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326845_non_optimized", + "timestamp": 1763326845.076511, + "optimized": false, + "prompt_length": 104, + "generated_length": 0, + "total_time": 0.0002353191375732422, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326845_optimized", + "timestamp": 1763326845.078336, + "optimized": true, + "prompt_length": 104, + "generated_length": 0, + "total_time": 0.0003311634063720703, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326848_non_optimized", + "timestamp": 1763326848.650482, + "optimized": false, + "prompt_length": 97, + "generated_length": 3, + "total_time": 0.2599804401397705, + "tokens_per_second": 11.539329644903832, + "time_per_token": 86.66014671325684, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326848_optimized", + "timestamp": 1763326848.655401, + "optimized": true, + "prompt_length": 97, + "generated_length": 0, + "total_time": 0.0032651424407958984, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326851_non_optimized", + "timestamp": 1763326852.0037806, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00024008750915527344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326851_optimized", + "timestamp": 1763326852.0057652, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00032830238342285156, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326855_non_optimized", + "timestamp": 1763326855.3215888, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0002357959747314453, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326855_optimized", + "timestamp": 1763326855.3233974, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0003199577331542969, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326858_non_optimized", + "timestamp": 1763326858.6427326, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0002455711364746094, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326858_optimized", + "timestamp": 1763326858.6446369, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0003333091735839844, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326861_non_optimized", + "timestamp": 1763326862.2181683, + "optimized": false, + "prompt_length": 94, + "generated_length": 0, + "total_time": 0.26184773445129395, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326861_optimized", + "timestamp": 1763326862.2230783, + "optimized": true, + "prompt_length": 94, + "generated_length": 0, + "total_time": 0.0032091140747070312, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326865_non_optimized", + "timestamp": 1763326865.582198, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.000240325927734375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326865_optimized", + "timestamp": 1763326865.5841131, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0003209114074707031, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326868_non_optimized", + "timestamp": 1763326868.9020677, + "optimized": false, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.0002429485321044922, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326868_optimized", + "timestamp": 1763326868.9039805, + "optimized": true, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.00033211708068847656, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326872_non_optimized", + "timestamp": 1763326872.2377539, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.00024199485778808594, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326872_optimized", + "timestamp": 1763326872.2396657, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.00032520294189453125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326875_non_optimized", + "timestamp": 1763326875.572948, + "optimized": false, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.0002472400665283203, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326875_optimized", + "timestamp": 1763326875.5749235, + "optimized": true, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.000339508056640625, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326878_non_optimized", + "timestamp": 1763326878.917066, + "optimized": false, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.00023293495178222656, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326878_optimized", + "timestamp": 1763326878.9190118, + "optimized": true, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.00032639503479003906, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326882_non_optimized", + "timestamp": 1763326882.2492902, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.00023746490478515625, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326882_optimized", + "timestamp": 1763326882.251233, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.0003230571746826172, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326885_non_optimized", + "timestamp": 1763326885.5688546, + "optimized": false, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.0002415180206298828, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326885_optimized", + "timestamp": 1763326885.5708404, + "optimized": true, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.00032401084899902344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326888_non_optimized", + "timestamp": 1763326888.8873081, + "optimized": false, + "prompt_length": 114, + "generated_length": 0, + "total_time": 0.00024056434631347656, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326888_optimized", + "timestamp": 1763326888.8893402, + "optimized": true, + "prompt_length": 114, + "generated_length": 0, + "total_time": 0.0003287792205810547, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326892_non_optimized", + "timestamp": 1763326892.2040594, + "optimized": false, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.0002467632293701172, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326892_optimized", + "timestamp": 1763326892.2060897, + "optimized": true, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.00032711029052734375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326895_non_optimized", + "timestamp": 1763326895.5235405, + "optimized": false, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.00023317337036132812, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326895_optimized", + "timestamp": 1763326895.5255647, + "optimized": true, + "prompt_length": 113, + "generated_length": 0, + "total_time": 0.0003211498260498047, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326898_non_optimized", + "timestamp": 1763326898.837703, + "optimized": false, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.0002307891845703125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326898_optimized", + "timestamp": 1763326898.839746, + "optimized": true, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.00032401084899902344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326902_non_optimized", + "timestamp": 1763326902.1575205, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.0002338886260986328, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326902_optimized", + "timestamp": 1763326902.1595387, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.0003142356872558594, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326905_non_optimized", + "timestamp": 1763326905.4874718, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00023674964904785156, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326905_optimized", + "timestamp": 1763326905.4895573, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00033164024353027344, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326908_non_optimized", + "timestamp": 1763326908.803102, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.00025081634521484375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326908_optimized", + "timestamp": 1763326908.8052208, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.00031828880310058594, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326912_non_optimized", + "timestamp": 1763326912.1360228, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.00023627281188964844, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326912_optimized", + "timestamp": 1763326912.1381037, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.0003142356872558594, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326915_non_optimized", + "timestamp": 1763326915.4476118, + "optimized": false, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.0002300739288330078, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326915_optimized", + "timestamp": 1763326915.449725, + "optimized": true, + "prompt_length": 103, + "generated_length": 0, + "total_time": 0.0003192424774169922, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326918_non_optimized", + "timestamp": 1763326918.7690768, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00023889541625976562, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326918_optimized", + "timestamp": 1763326918.7712831, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00032782554626464844, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326922_non_optimized", + "timestamp": 1763326922.0759387, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.0002472400665283203, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326922_optimized", + "timestamp": 1763326922.078079, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.000324249267578125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326925_non_optimized", + "timestamp": 1763326925.3886678, + "optimized": false, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.00023698806762695312, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326925_optimized", + "timestamp": 1763326925.3908367, + "optimized": true, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.00032520294189453125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326928_non_optimized", + "timestamp": 1763326928.6967072, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0002300739288330078, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326928_optimized", + "timestamp": 1763326928.6988983, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0003209114074707031, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326932_non_optimized", + "timestamp": 1763326932.2863104, + "optimized": false, + "prompt_length": 97, + "generated_length": 3, + "total_time": 0.25337719917297363, + "tokens_per_second": 11.840055102795507, + "time_per_token": 84.45906639099121, + "memory_used_mb": 8.1259765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326932_optimized", + "timestamp": 1763326932.2974646, + "optimized": true, + "prompt_length": 97, + "generated_length": 3, + "total_time": 0.008779525756835938, + "tokens_per_second": 341.70410601781447, + "time_per_token": 2.926508585611979, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326935_non_optimized", + "timestamp": 1763326935.6391814, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00023484230041503906, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326935_optimized", + "timestamp": 1763326935.6413832, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00031876564025878906, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326938_non_optimized", + "timestamp": 1763326938.9412687, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00023102760314941406, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326938_optimized", + "timestamp": 1763326938.9434795, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0003178119659423828, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326942_non_optimized", + "timestamp": 1763326942.2639933, + "optimized": false, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.00024819374084472656, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326942_optimized", + "timestamp": 1763326942.266253, + "optimized": true, + "prompt_length": 109, + "generated_length": 0, + "total_time": 0.00033283233642578125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326945_non_optimized", + "timestamp": 1763326945.5876389, + "optimized": false, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.00024247169494628906, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326945_optimized", + "timestamp": 1763326945.5899763, + "optimized": true, + "prompt_length": 112, + "generated_length": 0, + "total_time": 0.0003368854522705078, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326948_non_optimized", + "timestamp": 1763326948.9048371, + "optimized": false, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.0002422332763671875, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326948_optimized", + "timestamp": 1763326948.9071276, + "optimized": true, + "prompt_length": 107, + "generated_length": 0, + "total_time": 0.0003273487091064453, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326952_non_optimized", + "timestamp": 1763326952.2211714, + "optimized": false, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.00023603439331054688, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326952_optimized", + "timestamp": 1763326952.2234442, + "optimized": true, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.00032329559326171875, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326955_non_optimized", + "timestamp": 1763326955.5458589, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0002446174621582031, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326955_optimized", + "timestamp": 1763326955.5481744, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.00031685829162597656, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326958_non_optimized", + "timestamp": 1763326958.8506002, + "optimized": false, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.00022912025451660156, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326958_optimized", + "timestamp": 1763326958.85293, + "optimized": true, + "prompt_length": 102, + "generated_length": 0, + "total_time": 0.0003216266632080078, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326962_non_optimized", + "timestamp": 1763326962.1738048, + "optimized": false, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.00024080276489257812, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326962_optimized", + "timestamp": 1763326962.1762178, + "optimized": true, + "prompt_length": 106, + "generated_length": 0, + "total_time": 0.0003228187561035156, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326965_non_optimized", + "timestamp": 1763326965.4970171, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.00023508071899414062, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326965_optimized", + "timestamp": 1763326965.4993877, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0003216266632080078, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326968_non_optimized", + "timestamp": 1763326968.8236582, + "optimized": false, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0002491474151611328, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326968_optimized", + "timestamp": 1763326968.826099, + "optimized": true, + "prompt_length": 105, + "generated_length": 0, + "total_time": 0.0003414154052734375, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326972_non_optimized", + "timestamp": 1763326972.1689265, + "optimized": false, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.00025177001953125, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0009765625, + "gpu_utilization": null, + "device": "cuda" + }, + { + "run_name": "run_1763326972_optimized", + "timestamp": 1763326972.171371, + "optimized": true, + "prompt_length": 110, + "generated_length": 0, + "total_time": 0.0003230571746826172, + "tokens_per_second": 0, + "time_per_token": 0, + "memory_used_mb": 0.0, + "gpu_utilization": null, + "device": "cuda" + } + ] +} \ No newline at end of file