Files
sheepOp/inference_benchmarks/inference_metrics.json
2025-11-16 21:03:15 +00:00

2604 lines
74 KiB
JSON

{
"runs": [
{
"run_name": "run_1763326618_non_optimized",
"timestamp": 1763326618.9855335,
"optimized": false,
"prompt_length": 53,
"generated_length": 5,
"total_time": 0.4859466552734375,
"tokens_per_second": 10.289195214619902,
"time_per_token": 97.1893310546875,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326618_optimized",
"timestamp": 1763326619.026237,
"optimized": true,
"prompt_length": 53,
"generated_length": 14,
"total_time": 0.04017782211303711,
"tokens_per_second": 348.4509429259782,
"time_per_token": 2.869844436645508,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326622_non_optimized",
"timestamp": 1763326622.7140436,
"optimized": false,
"prompt_length": 66,
"generated_length": 0,
"total_time": 0.3553469181060791,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326622_optimized",
"timestamp": 1763326622.717709,
"optimized": true,
"prompt_length": 66,
"generated_length": 0,
"total_time": 0.003069162368774414,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326626_non_optimized",
"timestamp": 1763326626.4152546,
"optimized": false,
"prompt_length": 60,
"generated_length": 14,
"total_time": 0.3536839485168457,
"tokens_per_second": 39.58336265671155,
"time_per_token": 25.263139179774694,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326626_optimized",
"timestamp": 1763326626.4510481,
"optimized": true,
"prompt_length": 60,
"generated_length": 12,
"total_time": 0.03518319129943848,
"tokens_per_second": 341.0719595579017,
"time_per_token": 2.9319326082865396,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326629_non_optimized",
"timestamp": 1763326630.1408024,
"optimized": false,
"prompt_length": 61,
"generated_length": 16,
"total_time": 0.35126209259033203,
"tokens_per_second": 45.55003325867101,
"time_per_token": 21.953880786895752,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326629_optimized",
"timestamp": 1763326630.1444962,
"optimized": true,
"prompt_length": 61,
"generated_length": 0,
"total_time": 0.003066539764404297,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326633_non_optimized",
"timestamp": 1763326633.8381388,
"optimized": false,
"prompt_length": 60,
"generated_length": 0,
"total_time": 0.35619688034057617,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326633_optimized",
"timestamp": 1763326633.8418753,
"optimized": true,
"prompt_length": 60,
"generated_length": 0,
"total_time": 0.003074169158935547,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326637_non_optimized",
"timestamp": 1763326637.540628,
"optimized": false,
"prompt_length": 64,
"generated_length": 8,
"total_time": 0.3398597240447998,
"tokens_per_second": 23.539123450078044,
"time_per_token": 42.482465505599976,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326637_optimized",
"timestamp": 1763326637.5471995,
"optimized": true,
"prompt_length": 64,
"generated_length": 1,
"total_time": 0.005898714065551758,
"tokens_per_second": 169.52847500101046,
"time_per_token": 5.898714065551758,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326640_non_optimized",
"timestamp": 1763326641.2843578,
"optimized": false,
"prompt_length": 54,
"generated_length": 30,
"total_time": 0.37198519706726074,
"tokens_per_second": 80.64837051721585,
"time_per_token": 12.399506568908691,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326640_optimized",
"timestamp": 1763326641.3555984,
"optimized": true,
"prompt_length": 54,
"generated_length": 25,
"total_time": 0.0705265998840332,
"tokens_per_second": 354.4761840370508,
"time_per_token": 2.821063995361328,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326644_non_optimized",
"timestamp": 1763326645.0330086,
"optimized": false,
"prompt_length": 67,
"generated_length": 0,
"total_time": 0.333660364151001,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326644_optimized",
"timestamp": 1763326645.0367913,
"optimized": true,
"prompt_length": 67,
"generated_length": 0,
"total_time": 0.0030765533447265625,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326648_non_optimized",
"timestamp": 1763326648.755835,
"optimized": false,
"prompt_length": 63,
"generated_length": 17,
"total_time": 0.34771132469177246,
"tokens_per_second": 48.891131213714694,
"time_per_token": 20.453607334810144,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326648_optimized",
"timestamp": 1763326648.7840471,
"optimized": true,
"prompt_length": 63,
"generated_length": 9,
"total_time": 0.027483701705932617,
"tokens_per_second": 327.46680546519195,
"time_per_token": 3.053744633992513,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326652_non_optimized",
"timestamp": 1763326652.4592273,
"optimized": false,
"prompt_length": 73,
"generated_length": 19,
"total_time": 0.31671810150146484,
"tokens_per_second": 59.990256035025276,
"time_per_token": 16.66937376323499,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326652_optimized",
"timestamp": 1763326652.4657354,
"optimized": true,
"prompt_length": 73,
"generated_length": 1,
"total_time": 0.005753517150878906,
"tokens_per_second": 173.80672965357203,
"time_per_token": 5.753517150878906,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326655_non_optimized",
"timestamp": 1763326656.2020977,
"optimized": false,
"prompt_length": 46,
"generated_length": 0,
"total_time": 0.392134428024292,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326655_optimized",
"timestamp": 1763326656.2059286,
"optimized": true,
"prompt_length": 46,
"generated_length": 0,
"total_time": 0.003073453903198242,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326659_non_optimized",
"timestamp": 1763326659.8722565,
"optimized": false,
"prompt_length": 70,
"generated_length": 17,
"total_time": 0.33252859115600586,
"tokens_per_second": 51.12342352548099,
"time_per_token": 19.56050536211799,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326659_optimized",
"timestamp": 1763326659.913114,
"optimized": true,
"prompt_length": 70,
"generated_length": 14,
"total_time": 0.040065765380859375,
"tokens_per_second": 349.42549747691135,
"time_per_token": 2.861840384347098,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326663_non_optimized",
"timestamp": 1763326663.6047928,
"optimized": false,
"prompt_length": 66,
"generated_length": 14,
"total_time": 0.3415799140930176,
"tokens_per_second": 40.986016514389014,
"time_per_token": 24.3985652923584,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326663_optimized",
"timestamp": 1763326663.6473217,
"optimized": true,
"prompt_length": 66,
"generated_length": 14,
"total_time": 0.041661977767944336,
"tokens_per_second": 336.037815534814,
"time_per_token": 2.9758555548531667,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326666_non_optimized",
"timestamp": 1763326667.383774,
"optimized": false,
"prompt_length": 50,
"generated_length": 0,
"total_time": 0.38038086891174316,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326666_optimized",
"timestamp": 1763326667.4496503,
"optimized": true,
"prompt_length": 50,
"generated_length": 23,
"total_time": 0.06507229804992676,
"tokens_per_second": 353.45301594164135,
"time_per_token": 2.829230349996816,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326670_non_optimized",
"timestamp": 1763326671.1408262,
"optimized": false,
"prompt_length": 66,
"generated_length": 0,
"total_time": 0.34095191955566406,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326670_optimized",
"timestamp": 1763326671.1447349,
"optimized": true,
"prompt_length": 66,
"generated_length": 0,
"total_time": 0.0030736923217773438,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326674_non_optimized",
"timestamp": 1763326674.9176548,
"optimized": false,
"prompt_length": 43,
"generated_length": 43,
"total_time": 0.4153327941894531,
"tokens_per_second": 103.53143455458913,
"time_per_token": 9.6589021904524,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326674_optimized",
"timestamp": 1763326675.01074,
"optimized": true,
"prompt_length": 43,
"generated_length": 32,
"total_time": 0.09222531318664551,
"tokens_per_second": 346.97632238167006,
"time_per_token": 2.882041037082672,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326678_non_optimized",
"timestamp": 1763326678.7116284,
"optimized": false,
"prompt_length": 61,
"generated_length": 0,
"total_time": 0.34818243980407715,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326678_optimized",
"timestamp": 1763326678.7930346,
"optimized": true,
"prompt_length": 61,
"generated_length": 29,
"total_time": 0.08052730560302734,
"tokens_per_second": 360.1262923530596,
"time_per_token": 2.776803641483702,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326682_non_optimized",
"timestamp": 1763326682.5197363,
"optimized": false,
"prompt_length": 58,
"generated_length": 0,
"total_time": 0.3616158962249756,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326682_optimized",
"timestamp": 1763326682.579515,
"optimized": true,
"prompt_length": 58,
"generated_length": 19,
"total_time": 0.05885767936706543,
"tokens_per_second": 322.8125913953667,
"time_per_token": 3.0977725982666016,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326685_non_optimized",
"timestamp": 1763326686.2714303,
"optimized": false,
"prompt_length": 66,
"generated_length": 0,
"total_time": 0.3391129970550537,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326685_optimized",
"timestamp": 1763326686.275417,
"optimized": true,
"prompt_length": 66,
"generated_length": 0,
"total_time": 0.0030994415283203125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326689_non_optimized",
"timestamp": 1763326689.9624639,
"optimized": false,
"prompt_length": 67,
"generated_length": 28,
"total_time": 0.3353545665740967,
"tokens_per_second": 83.4937191583267,
"time_per_token": 11.976948806217738,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326689_optimized",
"timestamp": 1763326689.9768,
"optimized": true,
"prompt_length": 67,
"generated_length": 3,
"total_time": 0.011225700378417969,
"tokens_per_second": 267.24390451108655,
"time_per_token": 3.7419001261393228,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326693_non_optimized",
"timestamp": 1763326693.711334,
"optimized": false,
"prompt_length": 59,
"generated_length": 24,
"total_time": 0.3620493412017822,
"tokens_per_second": 66.28930719866715,
"time_per_token": 15.085389216740927,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326693_optimized",
"timestamp": 1763326693.7152996,
"optimized": true,
"prompt_length": 59,
"generated_length": 0,
"total_time": 0.0030515193939208984,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326697_non_optimized",
"timestamp": 1763326697.4116411,
"optimized": false,
"prompt_length": 63,
"generated_length": 0,
"total_time": 0.3452737331390381,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326697_optimized",
"timestamp": 1763326697.4156432,
"optimized": true,
"prompt_length": 63,
"generated_length": 0,
"total_time": 0.0030455589294433594,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326700_non_optimized",
"timestamp": 1763326701.1587586,
"optimized": false,
"prompt_length": 53,
"generated_length": 19,
"total_time": 0.3794400691986084,
"tokens_per_second": 50.07378382606958,
"time_per_token": 19.970529957821494,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326700_optimized",
"timestamp": 1763326701.2194357,
"optimized": true,
"prompt_length": 53,
"generated_length": 21,
"total_time": 0.05968618392944336,
"tokens_per_second": 351.84021858098123,
"time_per_token": 2.8421992347353977,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326704_non_optimized",
"timestamp": 1763326704.906457,
"optimized": false,
"prompt_length": 67,
"generated_length": 0,
"total_time": 0.3325045108795166,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326704_optimized",
"timestamp": 1763326704.9105146,
"optimized": true,
"prompt_length": 67,
"generated_length": 0,
"total_time": 0.0030672550201416016,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326708_non_optimized",
"timestamp": 1763326708.6155152,
"optimized": false,
"prompt_length": 64,
"generated_length": 4,
"total_time": 0.3434574604034424,
"tokens_per_second": 11.646274899084734,
"time_per_token": 85.8643651008606,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326708_optimized",
"timestamp": 1763326708.6492803,
"optimized": true,
"prompt_length": 64,
"generated_length": 11,
"total_time": 0.03272819519042969,
"tokens_per_second": 336.1016376245702,
"time_per_token": 2.9752904718572446,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326712_non_optimized",
"timestamp": 1763326712.3875144,
"optimized": false,
"prompt_length": 54,
"generated_length": 5,
"total_time": 0.3697817325592041,
"tokens_per_second": 13.521490002753103,
"time_per_token": 73.95634651184082,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326712_optimized",
"timestamp": 1763326712.3916335,
"optimized": true,
"prompt_length": 54,
"generated_length": 0,
"total_time": 0.0030570030212402344,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326715_non_optimized",
"timestamp": 1763326716.129369,
"optimized": false,
"prompt_length": 51,
"generated_length": 0,
"total_time": 0.38543081283569336,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326715_optimized",
"timestamp": 1763326716.133514,
"optimized": true,
"prompt_length": 51,
"generated_length": 0,
"total_time": 0.003108978271484375,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326719_non_optimized",
"timestamp": 1763326719.845239,
"optimized": false,
"prompt_length": 60,
"generated_length": 10,
"total_time": 0.35239410400390625,
"tokens_per_second": 28.377319275152093,
"time_per_token": 35.239410400390625,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326719_optimized",
"timestamp": 1763326719.8974886,
"optimized": true,
"prompt_length": 60,
"generated_length": 18,
"total_time": 0.051172733306884766,
"tokens_per_second": 351.7498252839718,
"time_per_token": 2.842929628160265,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326723_non_optimized",
"timestamp": 1763326723.6197646,
"optimized": false,
"prompt_length": 56,
"generated_length": 0,
"total_time": 0.3644559383392334,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326723_optimized",
"timestamp": 1763326723.6239054,
"optimized": true,
"prompt_length": 56,
"generated_length": 0,
"total_time": 0.0030570030212402344,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326726_non_optimized",
"timestamp": 1763326727.3191404,
"optimized": false,
"prompt_length": 65,
"generated_length": 0,
"total_time": 0.34804534912109375,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326726_optimized",
"timestamp": 1763326727.3233125,
"optimized": true,
"prompt_length": 65,
"generated_length": 0,
"total_time": 0.0030825138092041016,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326730_non_optimized",
"timestamp": 1763326731.044284,
"optimized": false,
"prompt_length": 59,
"generated_length": 16,
"total_time": 0.35839104652404785,
"tokens_per_second": 44.64397242950211,
"time_per_token": 22.39944040775299,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326730_optimized",
"timestamp": 1763326731.048486,
"optimized": true,
"prompt_length": 59,
"generated_length": 0,
"total_time": 0.0030989646911621094,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326734_non_optimized",
"timestamp": 1763326734.8039234,
"optimized": false,
"prompt_length": 54,
"generated_length": 0,
"total_time": 0.3762962818145752,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326734_optimized",
"timestamp": 1763326734.84245,
"optimized": true,
"prompt_length": 54,
"generated_length": 13,
"total_time": 0.03740096092224121,
"tokens_per_second": 347.58465235766965,
"time_per_token": 2.8769969940185547,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326738_non_optimized",
"timestamp": 1763326738.6017425,
"optimized": false,
"prompt_length": 54,
"generated_length": 41,
"total_time": 0.370577335357666,
"tokens_per_second": 110.63817478321626,
"time_per_token": 9.038471594089415,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326738_optimized",
"timestamp": 1763326738.6901221,
"optimized": true,
"prompt_length": 54,
"generated_length": 31,
"total_time": 0.08722376823425293,
"tokens_per_second": 355.4077131447096,
"time_per_token": 2.813669943040417,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326742_non_optimized",
"timestamp": 1763326742.419483,
"optimized": false,
"prompt_length": 59,
"generated_length": 12,
"total_time": 0.3536853790283203,
"tokens_per_second": 33.928459335716944,
"time_per_token": 29.47378158569336,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326742_optimized",
"timestamp": 1763326742.4236658,
"optimized": true,
"prompt_length": 59,
"generated_length": 0,
"total_time": 0.0030400753021240234,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326745_non_optimized",
"timestamp": 1763326746.1228704,
"optimized": false,
"prompt_length": 65,
"generated_length": 19,
"total_time": 0.3396615982055664,
"tokens_per_second": 55.93802802665087,
"time_per_token": 17.8769262213456,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326745_optimized",
"timestamp": 1763326746.1515133,
"optimized": true,
"prompt_length": 65,
"generated_length": 9,
"total_time": 0.02747321128845215,
"tokens_per_second": 327.59184594423374,
"time_per_token": 3.052579032050239,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326749_non_optimized",
"timestamp": 1763326749.8801262,
"optimized": false,
"prompt_length": 60,
"generated_length": 0,
"total_time": 0.35973238945007324,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326749_optimized",
"timestamp": 1763326749.9388814,
"optimized": true,
"prompt_length": 60,
"generated_length": 20,
"total_time": 0.05757498741149902,
"tokens_per_second": 347.37306770136695,
"time_per_token": 2.878749370574951,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326753_non_optimized",
"timestamp": 1763326753.6588142,
"optimized": false,
"prompt_length": 56,
"generated_length": 22,
"total_time": 0.3657264709472656,
"tokens_per_second": 60.154245720901606,
"time_per_token": 16.62393049760298,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326753_optimized",
"timestamp": 1763326753.6631608,
"optimized": true,
"prompt_length": 56,
"generated_length": 0,
"total_time": 0.0031464099884033203,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326757_non_optimized",
"timestamp": 1763326757.3905053,
"optimized": false,
"prompt_length": 59,
"generated_length": 33,
"total_time": 0.36070775985717773,
"tokens_per_second": 91.4868036469921,
"time_per_token": 10.930538177490234,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326757_optimized",
"timestamp": 1763326757.4786851,
"optimized": true,
"prompt_length": 59,
"generated_length": 31,
"total_time": 0.08695292472839355,
"tokens_per_second": 356.5147474548062,
"time_per_token": 2.804933055754631,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326760_non_optimized",
"timestamp": 1763326761.2264946,
"optimized": false,
"prompt_length": 49,
"generated_length": 17,
"total_time": 0.38045310974121094,
"tokens_per_second": 44.683561691908935,
"time_per_token": 22.379594690659466,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326760_optimized",
"timestamp": 1763326761.3133712,
"optimized": true,
"prompt_length": 49,
"generated_length": 31,
"total_time": 0.08566641807556152,
"tokens_per_second": 361.8687543659949,
"time_per_token": 2.763432841147146,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326764_non_optimized",
"timestamp": 1763326765.0356681,
"optimized": false,
"prompt_length": 57,
"generated_length": 37,
"total_time": 0.3631327152252197,
"tokens_per_second": 101.89112258049266,
"time_per_token": 9.814397708789722,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326764_optimized",
"timestamp": 1763326765.0750856,
"optimized": true,
"prompt_length": 57,
"generated_length": 13,
"total_time": 0.03816509246826172,
"tokens_per_second": 340.62540293360655,
"time_per_token": 2.9357763437124396,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326768_non_optimized",
"timestamp": 1763326768.767545,
"optimized": false,
"prompt_length": 68,
"generated_length": 19,
"total_time": 0.331697940826416,
"tokens_per_second": 57.281030906197486,
"time_per_token": 17.457786359285052,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326768_optimized",
"timestamp": 1763326768.7959456,
"optimized": true,
"prompt_length": 68,
"generated_length": 9,
"total_time": 0.027106285095214844,
"tokens_per_second": 332.0263167153362,
"time_per_token": 3.011809455023872,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326772_non_optimized",
"timestamp": 1763326772.5260317,
"optimized": false,
"prompt_length": 57,
"generated_length": 10,
"total_time": 0.36326122283935547,
"tokens_per_second": 27.52839932056906,
"time_per_token": 36.32612228393555,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326772_optimized",
"timestamp": 1763326772.5303962,
"optimized": true,
"prompt_length": 57,
"generated_length": 0,
"total_time": 0.003071308135986328,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326775_non_optimized",
"timestamp": 1763326776.254764,
"optimized": false,
"prompt_length": 57,
"generated_length": 25,
"total_time": 0.3601036071777344,
"tokens_per_second": 69.42446424220596,
"time_per_token": 14.404144287109375,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326775_optimized",
"timestamp": 1763326776.2603817,
"optimized": true,
"prompt_length": 57,
"generated_length": 0,
"total_time": 0.0030698776245117188,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326779_non_optimized",
"timestamp": 1763326779.9732888,
"optimized": false,
"prompt_length": 62,
"generated_length": 0,
"total_time": 0.3485872745513916,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326779_optimized",
"timestamp": 1763326780.019289,
"optimized": true,
"prompt_length": 62,
"generated_length": 15,
"total_time": 0.0446467399597168,
"tokens_per_second": 335.9707789086948,
"time_per_token": 2.9764493306477866,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326783_non_optimized",
"timestamp": 1763326783.7825809,
"optimized": false,
"prompt_length": 48,
"generated_length": 37,
"total_time": 0.3960244655609131,
"tokens_per_second": 93.42857125656288,
"time_per_token": 10.703363934078732,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326783_optimized",
"timestamp": 1763326783.8163621,
"optimized": true,
"prompt_length": 48,
"generated_length": 11,
"total_time": 0.032434701919555664,
"tokens_per_second": 339.142934850523,
"time_per_token": 2.9486092654141514,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326787_non_optimized",
"timestamp": 1763326787.5348887,
"optimized": false,
"prompt_length": 65,
"generated_length": 0,
"total_time": 0.33835744857788086,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326787_optimized",
"timestamp": 1763326787.539357,
"optimized": true,
"prompt_length": 65,
"generated_length": 0,
"total_time": 0.0030851364135742188,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326790_non_optimized",
"timestamp": 1763326791.2828848,
"optimized": false,
"prompt_length": 54,
"generated_length": 0,
"total_time": 0.37782812118530273,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326790_optimized",
"timestamp": 1763326791.2873359,
"optimized": true,
"prompt_length": 54,
"generated_length": 0,
"total_time": 0.003094911575317383,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326794_non_optimized",
"timestamp": 1763326794.9969409,
"optimized": false,
"prompt_length": 60,
"generated_length": 15,
"total_time": 0.3552377223968506,
"tokens_per_second": 42.225245390023325,
"time_per_token": 23.682514826456707,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326794_optimized",
"timestamp": 1763326795.0417602,
"optimized": true,
"prompt_length": 60,
"generated_length": 15,
"total_time": 0.043425798416137695,
"tokens_per_second": 345.41679248494296,
"time_per_token": 2.895053227742513,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326798_non_optimized",
"timestamp": 1763326798.768397,
"optimized": false,
"prompt_length": 56,
"generated_length": 0,
"total_time": 0.3616046905517578,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326798_optimized",
"timestamp": 1763326798.7728815,
"optimized": true,
"prompt_length": 56,
"generated_length": 0,
"total_time": 0.0030786991119384766,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326802_non_optimized",
"timestamp": 1763326802.5161467,
"optimized": false,
"prompt_length": 55,
"generated_length": 8,
"total_time": 0.367128849029541,
"tokens_per_second": 21.790714680001297,
"time_per_token": 45.89110612869263,
"memory_used_mb": 8.12548828125,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326802_optimized",
"timestamp": 1763326802.5774758,
"optimized": true,
"prompt_length": 55,
"generated_length": 21,
"total_time": 0.05986332893371582,
"tokens_per_second": 350.79906804468607,
"time_per_token": 2.8506347111293246,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326805_non_optimized",
"timestamp": 1763326806.225911,
"optimized": false,
"prompt_length": 86,
"generated_length": 14,
"total_time": 0.2903730869293213,
"tokens_per_second": 48.21383464992984,
"time_per_token": 20.740934780665807,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326805_optimized",
"timestamp": 1763326806.23062,
"optimized": true,
"prompt_length": 86,
"generated_length": 0,
"total_time": 0.003251314163208008,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326809_non_optimized",
"timestamp": 1763326809.8743262,
"optimized": false,
"prompt_length": 87,
"generated_length": 0,
"total_time": 0.2792494297027588,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326809_optimized",
"timestamp": 1763326809.8790207,
"optimized": true,
"prompt_length": 87,
"generated_length": 0,
"total_time": 0.0032303333282470703,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326813_non_optimized",
"timestamp": 1763326813.5236838,
"optimized": false,
"prompt_length": 92,
"generated_length": 8,
"total_time": 0.2737746238708496,
"tokens_per_second": 29.22111584663858,
"time_per_token": 34.2218279838562,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326813_optimized",
"timestamp": 1763326813.5475192,
"optimized": true,
"prompt_length": 92,
"generated_length": 8,
"total_time": 0.022355318069458008,
"tokens_per_second": 357.8566842638511,
"time_per_token": 2.794414758682251,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326816_non_optimized",
"timestamp": 1763326817.1884215,
"optimized": false,
"prompt_length": 91,
"generated_length": 9,
"total_time": 0.2681541442871094,
"tokens_per_second": 33.56278540436731,
"time_per_token": 29.794904920789932,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326816_optimized",
"timestamp": 1763326817.193147,
"optimized": true,
"prompt_length": 91,
"generated_length": 0,
"total_time": 0.0032160282135009766,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326820_non_optimized",
"timestamp": 1763326820.8178222,
"optimized": false,
"prompt_length": 96,
"generated_length": 0,
"total_time": 0.25312256813049316,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326820_optimized",
"timestamp": 1763326820.8225522,
"optimized": true,
"prompt_length": 96,
"generated_length": 0,
"total_time": 0.003218412399291992,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326824_non_optimized",
"timestamp": 1763326824.1973226,
"optimized": false,
"prompt_length": 100,
"generated_length": 0,
"total_time": 0.00023245811462402344,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326824_optimized",
"timestamp": 1763326824.198983,
"optimized": true,
"prompt_length": 100,
"generated_length": 0,
"total_time": 0.0003190040588378906,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326827_non_optimized",
"timestamp": 1763326827.7810934,
"optimized": false,
"prompt_length": 98,
"generated_length": 2,
"total_time": 0.24810361862182617,
"tokens_per_second": 8.061148044150517,
"time_per_token": 124.05180931091309,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326827_optimized",
"timestamp": 1763326827.7893782,
"optimized": true,
"prompt_length": 98,
"generated_length": 2,
"total_time": 0.006571054458618164,
"tokens_per_second": 304.3651536591561,
"time_per_token": 3.285527229309082,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326831_non_optimized",
"timestamp": 1763326831.4181712,
"optimized": false,
"prompt_length": 95,
"generated_length": 5,
"total_time": 0.26477837562561035,
"tokens_per_second": 18.883717328449315,
"time_per_token": 52.95567512512207,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326831_optimized",
"timestamp": 1763326831.425718,
"optimized": true,
"prompt_length": 95,
"generated_length": 1,
"total_time": 0.00593876838684082,
"tokens_per_second": 168.38508169737847,
"time_per_token": 5.93876838684082,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326834_non_optimized",
"timestamp": 1763326835.0461798,
"optimized": false,
"prompt_length": 99,
"generated_length": 1,
"total_time": 0.25382065773010254,
"tokens_per_second": 3.9397896488919324,
"time_per_token": 253.82065773010254,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326834_optimized",
"timestamp": 1763326835.0514338,
"optimized": true,
"prompt_length": 99,
"generated_length": 1,
"total_time": 0.0036373138427734375,
"tokens_per_second": 274.9281594126901,
"time_per_token": 3.6373138427734375,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326838_non_optimized",
"timestamp": 1763326838.404944,
"optimized": false,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.0002460479736328125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326838_optimized",
"timestamp": 1763326838.4067247,
"optimized": true,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.00032591819763183594,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326841_non_optimized",
"timestamp": 1763326841.7476485,
"optimized": false,
"prompt_length": 111,
"generated_length": 0,
"total_time": 0.00024366378784179688,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326841_optimized",
"timestamp": 1763326841.7494369,
"optimized": true,
"prompt_length": 111,
"generated_length": 0,
"total_time": 0.000324249267578125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326845_non_optimized",
"timestamp": 1763326845.076511,
"optimized": false,
"prompt_length": 104,
"generated_length": 0,
"total_time": 0.0002353191375732422,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326845_optimized",
"timestamp": 1763326845.078336,
"optimized": true,
"prompt_length": 104,
"generated_length": 0,
"total_time": 0.0003311634063720703,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326848_non_optimized",
"timestamp": 1763326848.650482,
"optimized": false,
"prompt_length": 97,
"generated_length": 3,
"total_time": 0.2599804401397705,
"tokens_per_second": 11.539329644903832,
"time_per_token": 86.66014671325684,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326848_optimized",
"timestamp": 1763326848.655401,
"optimized": true,
"prompt_length": 97,
"generated_length": 0,
"total_time": 0.0032651424407958984,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326851_non_optimized",
"timestamp": 1763326852.0037806,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00024008750915527344,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326851_optimized",
"timestamp": 1763326852.0057652,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00032830238342285156,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326855_non_optimized",
"timestamp": 1763326855.3215888,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.0002357959747314453,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326855_optimized",
"timestamp": 1763326855.3233974,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.0003199577331542969,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326858_non_optimized",
"timestamp": 1763326858.6427326,
"optimized": false,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0002455711364746094,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326858_optimized",
"timestamp": 1763326858.6446369,
"optimized": true,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0003333091735839844,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326861_non_optimized",
"timestamp": 1763326862.2181683,
"optimized": false,
"prompt_length": 94,
"generated_length": 0,
"total_time": 0.26184773445129395,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326861_optimized",
"timestamp": 1763326862.2230783,
"optimized": true,
"prompt_length": 94,
"generated_length": 0,
"total_time": 0.0032091140747070312,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326865_non_optimized",
"timestamp": 1763326865.582198,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.000240325927734375,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326865_optimized",
"timestamp": 1763326865.5841131,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.0003209114074707031,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326868_non_optimized",
"timestamp": 1763326868.9020677,
"optimized": false,
"prompt_length": 103,
"generated_length": 0,
"total_time": 0.0002429485321044922,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326868_optimized",
"timestamp": 1763326868.9039805,
"optimized": true,
"prompt_length": 103,
"generated_length": 0,
"total_time": 0.00033211708068847656,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326872_non_optimized",
"timestamp": 1763326872.2377539,
"optimized": false,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.00024199485778808594,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326872_optimized",
"timestamp": 1763326872.2396657,
"optimized": true,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.00032520294189453125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326875_non_optimized",
"timestamp": 1763326875.572948,
"optimized": false,
"prompt_length": 110,
"generated_length": 0,
"total_time": 0.0002472400665283203,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326875_optimized",
"timestamp": 1763326875.5749235,
"optimized": true,
"prompt_length": 110,
"generated_length": 0,
"total_time": 0.000339508056640625,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326878_non_optimized",
"timestamp": 1763326878.917066,
"optimized": false,
"prompt_length": 106,
"generated_length": 0,
"total_time": 0.00023293495178222656,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326878_optimized",
"timestamp": 1763326878.9190118,
"optimized": true,
"prompt_length": 106,
"generated_length": 0,
"total_time": 0.00032639503479003906,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326882_non_optimized",
"timestamp": 1763326882.2492902,
"optimized": false,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.00023746490478515625,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326882_optimized",
"timestamp": 1763326882.251233,
"optimized": true,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.0003230571746826172,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326885_non_optimized",
"timestamp": 1763326885.5688546,
"optimized": false,
"prompt_length": 110,
"generated_length": 0,
"total_time": 0.0002415180206298828,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326885_optimized",
"timestamp": 1763326885.5708404,
"optimized": true,
"prompt_length": 110,
"generated_length": 0,
"total_time": 0.00032401084899902344,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326888_non_optimized",
"timestamp": 1763326888.8873081,
"optimized": false,
"prompt_length": 114,
"generated_length": 0,
"total_time": 0.00024056434631347656,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326888_optimized",
"timestamp": 1763326888.8893402,
"optimized": true,
"prompt_length": 114,
"generated_length": 0,
"total_time": 0.0003287792205810547,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326892_non_optimized",
"timestamp": 1763326892.2040594,
"optimized": false,
"prompt_length": 113,
"generated_length": 0,
"total_time": 0.0002467632293701172,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326892_optimized",
"timestamp": 1763326892.2060897,
"optimized": true,
"prompt_length": 113,
"generated_length": 0,
"total_time": 0.00032711029052734375,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326895_non_optimized",
"timestamp": 1763326895.5235405,
"optimized": false,
"prompt_length": 113,
"generated_length": 0,
"total_time": 0.00023317337036132812,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326895_optimized",
"timestamp": 1763326895.5255647,
"optimized": true,
"prompt_length": 113,
"generated_length": 0,
"total_time": 0.0003211498260498047,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326898_non_optimized",
"timestamp": 1763326898.837703,
"optimized": false,
"prompt_length": 112,
"generated_length": 0,
"total_time": 0.0002307891845703125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326898_optimized",
"timestamp": 1763326898.839746,
"optimized": true,
"prompt_length": 112,
"generated_length": 0,
"total_time": 0.00032401084899902344,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326902_non_optimized",
"timestamp": 1763326902.1575205,
"optimized": false,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.0002338886260986328,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326902_optimized",
"timestamp": 1763326902.1595387,
"optimized": true,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.0003142356872558594,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326905_non_optimized",
"timestamp": 1763326905.4874718,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00023674964904785156,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326905_optimized",
"timestamp": 1763326905.4895573,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00033164024353027344,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326908_non_optimized",
"timestamp": 1763326908.803102,
"optimized": false,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.00025081634521484375,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326908_optimized",
"timestamp": 1763326908.8052208,
"optimized": true,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.00031828880310058594,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326912_non_optimized",
"timestamp": 1763326912.1360228,
"optimized": false,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.00023627281188964844,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326912_optimized",
"timestamp": 1763326912.1381037,
"optimized": true,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.0003142356872558594,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326915_non_optimized",
"timestamp": 1763326915.4476118,
"optimized": false,
"prompt_length": 103,
"generated_length": 0,
"total_time": 0.0002300739288330078,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326915_optimized",
"timestamp": 1763326915.449725,
"optimized": true,
"prompt_length": 103,
"generated_length": 0,
"total_time": 0.0003192424774169922,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326918_non_optimized",
"timestamp": 1763326918.7690768,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00023889541625976562,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326918_optimized",
"timestamp": 1763326918.7712831,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00032782554626464844,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326922_non_optimized",
"timestamp": 1763326922.0759387,
"optimized": false,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.0002472400665283203,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326922_optimized",
"timestamp": 1763326922.078079,
"optimized": true,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.000324249267578125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326925_non_optimized",
"timestamp": 1763326925.3886678,
"optimized": false,
"prompt_length": 112,
"generated_length": 0,
"total_time": 0.00023698806762695312,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326925_optimized",
"timestamp": 1763326925.3908367,
"optimized": true,
"prompt_length": 112,
"generated_length": 0,
"total_time": 0.00032520294189453125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326928_non_optimized",
"timestamp": 1763326928.6967072,
"optimized": false,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0002300739288330078,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326928_optimized",
"timestamp": 1763326928.6988983,
"optimized": true,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0003209114074707031,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326932_non_optimized",
"timestamp": 1763326932.2863104,
"optimized": false,
"prompt_length": 97,
"generated_length": 3,
"total_time": 0.25337719917297363,
"tokens_per_second": 11.840055102795507,
"time_per_token": 84.45906639099121,
"memory_used_mb": 8.1259765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326932_optimized",
"timestamp": 1763326932.2974646,
"optimized": true,
"prompt_length": 97,
"generated_length": 3,
"total_time": 0.008779525756835938,
"tokens_per_second": 341.70410601781447,
"time_per_token": 2.926508585611979,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326935_non_optimized",
"timestamp": 1763326935.6391814,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00023484230041503906,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326935_optimized",
"timestamp": 1763326935.6413832,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00031876564025878906,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326938_non_optimized",
"timestamp": 1763326938.9412687,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00023102760314941406,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326938_optimized",
"timestamp": 1763326938.9434795,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.0003178119659423828,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326942_non_optimized",
"timestamp": 1763326942.2639933,
"optimized": false,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.00024819374084472656,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326942_optimized",
"timestamp": 1763326942.266253,
"optimized": true,
"prompt_length": 109,
"generated_length": 0,
"total_time": 0.00033283233642578125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326945_non_optimized",
"timestamp": 1763326945.5876389,
"optimized": false,
"prompt_length": 112,
"generated_length": 0,
"total_time": 0.00024247169494628906,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326945_optimized",
"timestamp": 1763326945.5899763,
"optimized": true,
"prompt_length": 112,
"generated_length": 0,
"total_time": 0.0003368854522705078,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326948_non_optimized",
"timestamp": 1763326948.9048371,
"optimized": false,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.0002422332763671875,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326948_optimized",
"timestamp": 1763326948.9071276,
"optimized": true,
"prompt_length": 107,
"generated_length": 0,
"total_time": 0.0003273487091064453,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326952_non_optimized",
"timestamp": 1763326952.2211714,
"optimized": false,
"prompt_length": 106,
"generated_length": 0,
"total_time": 0.00023603439331054688,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326952_optimized",
"timestamp": 1763326952.2234442,
"optimized": true,
"prompt_length": 106,
"generated_length": 0,
"total_time": 0.00032329559326171875,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326955_non_optimized",
"timestamp": 1763326955.5458589,
"optimized": false,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0002446174621582031,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326955_optimized",
"timestamp": 1763326955.5481744,
"optimized": true,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.00031685829162597656,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326958_non_optimized",
"timestamp": 1763326958.8506002,
"optimized": false,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.00022912025451660156,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326958_optimized",
"timestamp": 1763326958.85293,
"optimized": true,
"prompt_length": 102,
"generated_length": 0,
"total_time": 0.0003216266632080078,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326962_non_optimized",
"timestamp": 1763326962.1738048,
"optimized": false,
"prompt_length": 106,
"generated_length": 0,
"total_time": 0.00024080276489257812,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326962_optimized",
"timestamp": 1763326962.1762178,
"optimized": true,
"prompt_length": 106,
"generated_length": 0,
"total_time": 0.0003228187561035156,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326965_non_optimized",
"timestamp": 1763326965.4970171,
"optimized": false,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.00023508071899414062,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326965_optimized",
"timestamp": 1763326965.4993877,
"optimized": true,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0003216266632080078,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326968_non_optimized",
"timestamp": 1763326968.8236582,
"optimized": false,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0002491474151611328,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326968_optimized",
"timestamp": 1763326968.826099,
"optimized": true,
"prompt_length": 105,
"generated_length": 0,
"total_time": 0.0003414154052734375,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326972_non_optimized",
"timestamp": 1763326972.1689265,
"optimized": false,
"prompt_length": 110,
"generated_length": 0,
"total_time": 0.00025177001953125,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0009765625,
"gpu_utilization": null,
"device": "cuda"
},
{
"run_name": "run_1763326972_optimized",
"timestamp": 1763326972.171371,
"optimized": true,
"prompt_length": 110,
"generated_length": 0,
"total_time": 0.0003230571746826172,
"tokens_per_second": 0,
"time_per_token": 0,
"memory_used_mb": 0.0,
"gpu_utilization": null,
"device": "cuda"
}
]
}