7843 lines
228 KiB
JSON
7843 lines
228 KiB
JSON
{
|
|
"runs": [
|
|
{
|
|
"run_name": "run_1763326618_non_optimized",
|
|
"timestamp": 1763326618.9855335,
|
|
"optimized": false,
|
|
"prompt_length": 53,
|
|
"generated_length": 5,
|
|
"total_time": 0.4859466552734375,
|
|
"tokens_per_second": 10.289195214619902,
|
|
"time_per_token": 97.1893310546875,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326618_optimized",
|
|
"timestamp": 1763326619.026237,
|
|
"optimized": true,
|
|
"prompt_length": 53,
|
|
"generated_length": 14,
|
|
"total_time": 0.04017782211303711,
|
|
"tokens_per_second": 348.4509429259782,
|
|
"time_per_token": 2.869844436645508,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326622_non_optimized",
|
|
"timestamp": 1763326622.7140436,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.3553469181060791,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326622_optimized",
|
|
"timestamp": 1763326622.717709,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.003069162368774414,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326626_non_optimized",
|
|
"timestamp": 1763326626.4152546,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 14,
|
|
"total_time": 0.3536839485168457,
|
|
"tokens_per_second": 39.58336265671155,
|
|
"time_per_token": 25.263139179774694,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326626_optimized",
|
|
"timestamp": 1763326626.4510481,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 12,
|
|
"total_time": 0.03518319129943848,
|
|
"tokens_per_second": 341.0719595579017,
|
|
"time_per_token": 2.9319326082865396,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326629_non_optimized",
|
|
"timestamp": 1763326630.1408024,
|
|
"optimized": false,
|
|
"prompt_length": 61,
|
|
"generated_length": 16,
|
|
"total_time": 0.35126209259033203,
|
|
"tokens_per_second": 45.55003325867101,
|
|
"time_per_token": 21.953880786895752,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326629_optimized",
|
|
"timestamp": 1763326630.1444962,
|
|
"optimized": true,
|
|
"prompt_length": 61,
|
|
"generated_length": 0,
|
|
"total_time": 0.003066539764404297,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326633_non_optimized",
|
|
"timestamp": 1763326633.8381388,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 0,
|
|
"total_time": 0.35619688034057617,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326633_optimized",
|
|
"timestamp": 1763326633.8418753,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 0,
|
|
"total_time": 0.003074169158935547,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326637_non_optimized",
|
|
"timestamp": 1763326637.540628,
|
|
"optimized": false,
|
|
"prompt_length": 64,
|
|
"generated_length": 8,
|
|
"total_time": 0.3398597240447998,
|
|
"tokens_per_second": 23.539123450078044,
|
|
"time_per_token": 42.482465505599976,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326637_optimized",
|
|
"timestamp": 1763326637.5471995,
|
|
"optimized": true,
|
|
"prompt_length": 64,
|
|
"generated_length": 1,
|
|
"total_time": 0.005898714065551758,
|
|
"tokens_per_second": 169.52847500101046,
|
|
"time_per_token": 5.898714065551758,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326640_non_optimized",
|
|
"timestamp": 1763326641.2843578,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 30,
|
|
"total_time": 0.37198519706726074,
|
|
"tokens_per_second": 80.64837051721585,
|
|
"time_per_token": 12.399506568908691,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326640_optimized",
|
|
"timestamp": 1763326641.3555984,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 25,
|
|
"total_time": 0.0705265998840332,
|
|
"tokens_per_second": 354.4761840370508,
|
|
"time_per_token": 2.821063995361328,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326644_non_optimized",
|
|
"timestamp": 1763326645.0330086,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 0,
|
|
"total_time": 0.333660364151001,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326644_optimized",
|
|
"timestamp": 1763326645.0367913,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030765533447265625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326648_non_optimized",
|
|
"timestamp": 1763326648.755835,
|
|
"optimized": false,
|
|
"prompt_length": 63,
|
|
"generated_length": 17,
|
|
"total_time": 0.34771132469177246,
|
|
"tokens_per_second": 48.891131213714694,
|
|
"time_per_token": 20.453607334810144,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326648_optimized",
|
|
"timestamp": 1763326648.7840471,
|
|
"optimized": true,
|
|
"prompt_length": 63,
|
|
"generated_length": 9,
|
|
"total_time": 0.027483701705932617,
|
|
"tokens_per_second": 327.46680546519195,
|
|
"time_per_token": 3.053744633992513,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326652_non_optimized",
|
|
"timestamp": 1763326652.4592273,
|
|
"optimized": false,
|
|
"prompt_length": 73,
|
|
"generated_length": 19,
|
|
"total_time": 0.31671810150146484,
|
|
"tokens_per_second": 59.990256035025276,
|
|
"time_per_token": 16.66937376323499,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326652_optimized",
|
|
"timestamp": 1763326652.4657354,
|
|
"optimized": true,
|
|
"prompt_length": 73,
|
|
"generated_length": 1,
|
|
"total_time": 0.005753517150878906,
|
|
"tokens_per_second": 173.80672965357203,
|
|
"time_per_token": 5.753517150878906,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326655_non_optimized",
|
|
"timestamp": 1763326656.2020977,
|
|
"optimized": false,
|
|
"prompt_length": 46,
|
|
"generated_length": 0,
|
|
"total_time": 0.392134428024292,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326655_optimized",
|
|
"timestamp": 1763326656.2059286,
|
|
"optimized": true,
|
|
"prompt_length": 46,
|
|
"generated_length": 0,
|
|
"total_time": 0.003073453903198242,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326659_non_optimized",
|
|
"timestamp": 1763326659.8722565,
|
|
"optimized": false,
|
|
"prompt_length": 70,
|
|
"generated_length": 17,
|
|
"total_time": 0.33252859115600586,
|
|
"tokens_per_second": 51.12342352548099,
|
|
"time_per_token": 19.56050536211799,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326659_optimized",
|
|
"timestamp": 1763326659.913114,
|
|
"optimized": true,
|
|
"prompt_length": 70,
|
|
"generated_length": 14,
|
|
"total_time": 0.040065765380859375,
|
|
"tokens_per_second": 349.42549747691135,
|
|
"time_per_token": 2.861840384347098,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326663_non_optimized",
|
|
"timestamp": 1763326663.6047928,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 14,
|
|
"total_time": 0.3415799140930176,
|
|
"tokens_per_second": 40.986016514389014,
|
|
"time_per_token": 24.3985652923584,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326663_optimized",
|
|
"timestamp": 1763326663.6473217,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 14,
|
|
"total_time": 0.041661977767944336,
|
|
"tokens_per_second": 336.037815534814,
|
|
"time_per_token": 2.9758555548531667,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326666_non_optimized",
|
|
"timestamp": 1763326667.383774,
|
|
"optimized": false,
|
|
"prompt_length": 50,
|
|
"generated_length": 0,
|
|
"total_time": 0.38038086891174316,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326666_optimized",
|
|
"timestamp": 1763326667.4496503,
|
|
"optimized": true,
|
|
"prompt_length": 50,
|
|
"generated_length": 23,
|
|
"total_time": 0.06507229804992676,
|
|
"tokens_per_second": 353.45301594164135,
|
|
"time_per_token": 2.829230349996816,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326670_non_optimized",
|
|
"timestamp": 1763326671.1408262,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.34095191955566406,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326670_optimized",
|
|
"timestamp": 1763326671.1447349,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030736923217773438,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326674_non_optimized",
|
|
"timestamp": 1763326674.9176548,
|
|
"optimized": false,
|
|
"prompt_length": 43,
|
|
"generated_length": 43,
|
|
"total_time": 0.4153327941894531,
|
|
"tokens_per_second": 103.53143455458913,
|
|
"time_per_token": 9.6589021904524,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326674_optimized",
|
|
"timestamp": 1763326675.01074,
|
|
"optimized": true,
|
|
"prompt_length": 43,
|
|
"generated_length": 32,
|
|
"total_time": 0.09222531318664551,
|
|
"tokens_per_second": 346.97632238167006,
|
|
"time_per_token": 2.882041037082672,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326678_non_optimized",
|
|
"timestamp": 1763326678.7116284,
|
|
"optimized": false,
|
|
"prompt_length": 61,
|
|
"generated_length": 0,
|
|
"total_time": 0.34818243980407715,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326678_optimized",
|
|
"timestamp": 1763326678.7930346,
|
|
"optimized": true,
|
|
"prompt_length": 61,
|
|
"generated_length": 29,
|
|
"total_time": 0.08052730560302734,
|
|
"tokens_per_second": 360.1262923530596,
|
|
"time_per_token": 2.776803641483702,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326682_non_optimized",
|
|
"timestamp": 1763326682.5197363,
|
|
"optimized": false,
|
|
"prompt_length": 58,
|
|
"generated_length": 0,
|
|
"total_time": 0.3616158962249756,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326682_optimized",
|
|
"timestamp": 1763326682.579515,
|
|
"optimized": true,
|
|
"prompt_length": 58,
|
|
"generated_length": 19,
|
|
"total_time": 0.05885767936706543,
|
|
"tokens_per_second": 322.8125913953667,
|
|
"time_per_token": 3.0977725982666016,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326685_non_optimized",
|
|
"timestamp": 1763326686.2714303,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.3391129970550537,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326685_optimized",
|
|
"timestamp": 1763326686.275417,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030994415283203125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326689_non_optimized",
|
|
"timestamp": 1763326689.9624639,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 28,
|
|
"total_time": 0.3353545665740967,
|
|
"tokens_per_second": 83.4937191583267,
|
|
"time_per_token": 11.976948806217738,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326689_optimized",
|
|
"timestamp": 1763326689.9768,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 3,
|
|
"total_time": 0.011225700378417969,
|
|
"tokens_per_second": 267.24390451108655,
|
|
"time_per_token": 3.7419001261393228,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326693_non_optimized",
|
|
"timestamp": 1763326693.711334,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 24,
|
|
"total_time": 0.3620493412017822,
|
|
"tokens_per_second": 66.28930719866715,
|
|
"time_per_token": 15.085389216740927,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326693_optimized",
|
|
"timestamp": 1763326693.7152996,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030515193939208984,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326697_non_optimized",
|
|
"timestamp": 1763326697.4116411,
|
|
"optimized": false,
|
|
"prompt_length": 63,
|
|
"generated_length": 0,
|
|
"total_time": 0.3452737331390381,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326697_optimized",
|
|
"timestamp": 1763326697.4156432,
|
|
"optimized": true,
|
|
"prompt_length": 63,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030455589294433594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326700_non_optimized",
|
|
"timestamp": 1763326701.1587586,
|
|
"optimized": false,
|
|
"prompt_length": 53,
|
|
"generated_length": 19,
|
|
"total_time": 0.3794400691986084,
|
|
"tokens_per_second": 50.07378382606958,
|
|
"time_per_token": 19.970529957821494,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326700_optimized",
|
|
"timestamp": 1763326701.2194357,
|
|
"optimized": true,
|
|
"prompt_length": 53,
|
|
"generated_length": 21,
|
|
"total_time": 0.05968618392944336,
|
|
"tokens_per_second": 351.84021858098123,
|
|
"time_per_token": 2.8421992347353977,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326704_non_optimized",
|
|
"timestamp": 1763326704.906457,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 0,
|
|
"total_time": 0.3325045108795166,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326704_optimized",
|
|
"timestamp": 1763326704.9105146,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030672550201416016,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326708_non_optimized",
|
|
"timestamp": 1763326708.6155152,
|
|
"optimized": false,
|
|
"prompt_length": 64,
|
|
"generated_length": 4,
|
|
"total_time": 0.3434574604034424,
|
|
"tokens_per_second": 11.646274899084734,
|
|
"time_per_token": 85.8643651008606,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326708_optimized",
|
|
"timestamp": 1763326708.6492803,
|
|
"optimized": true,
|
|
"prompt_length": 64,
|
|
"generated_length": 11,
|
|
"total_time": 0.03272819519042969,
|
|
"tokens_per_second": 336.1016376245702,
|
|
"time_per_token": 2.9752904718572446,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326712_non_optimized",
|
|
"timestamp": 1763326712.3875144,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 5,
|
|
"total_time": 0.3697817325592041,
|
|
"tokens_per_second": 13.521490002753103,
|
|
"time_per_token": 73.95634651184082,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326712_optimized",
|
|
"timestamp": 1763326712.3916335,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030570030212402344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326715_non_optimized",
|
|
"timestamp": 1763326716.129369,
|
|
"optimized": false,
|
|
"prompt_length": 51,
|
|
"generated_length": 0,
|
|
"total_time": 0.38543081283569336,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326715_optimized",
|
|
"timestamp": 1763326716.133514,
|
|
"optimized": true,
|
|
"prompt_length": 51,
|
|
"generated_length": 0,
|
|
"total_time": 0.003108978271484375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326719_non_optimized",
|
|
"timestamp": 1763326719.845239,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 10,
|
|
"total_time": 0.35239410400390625,
|
|
"tokens_per_second": 28.377319275152093,
|
|
"time_per_token": 35.239410400390625,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326719_optimized",
|
|
"timestamp": 1763326719.8974886,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 18,
|
|
"total_time": 0.051172733306884766,
|
|
"tokens_per_second": 351.7498252839718,
|
|
"time_per_token": 2.842929628160265,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326723_non_optimized",
|
|
"timestamp": 1763326723.6197646,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 0,
|
|
"total_time": 0.3644559383392334,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326723_optimized",
|
|
"timestamp": 1763326723.6239054,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030570030212402344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326726_non_optimized",
|
|
"timestamp": 1763326727.3191404,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 0,
|
|
"total_time": 0.34804534912109375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326726_optimized",
|
|
"timestamp": 1763326727.3233125,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030825138092041016,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326730_non_optimized",
|
|
"timestamp": 1763326731.044284,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 16,
|
|
"total_time": 0.35839104652404785,
|
|
"tokens_per_second": 44.64397242950211,
|
|
"time_per_token": 22.39944040775299,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326730_optimized",
|
|
"timestamp": 1763326731.048486,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030989646911621094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326734_non_optimized",
|
|
"timestamp": 1763326734.8039234,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 0,
|
|
"total_time": 0.3762962818145752,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326734_optimized",
|
|
"timestamp": 1763326734.84245,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 13,
|
|
"total_time": 0.03740096092224121,
|
|
"tokens_per_second": 347.58465235766965,
|
|
"time_per_token": 2.8769969940185547,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326738_non_optimized",
|
|
"timestamp": 1763326738.6017425,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 41,
|
|
"total_time": 0.370577335357666,
|
|
"tokens_per_second": 110.63817478321626,
|
|
"time_per_token": 9.038471594089415,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326738_optimized",
|
|
"timestamp": 1763326738.6901221,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 31,
|
|
"total_time": 0.08722376823425293,
|
|
"tokens_per_second": 355.4077131447096,
|
|
"time_per_token": 2.813669943040417,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326742_non_optimized",
|
|
"timestamp": 1763326742.419483,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 12,
|
|
"total_time": 0.3536853790283203,
|
|
"tokens_per_second": 33.928459335716944,
|
|
"time_per_token": 29.47378158569336,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326742_optimized",
|
|
"timestamp": 1763326742.4236658,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030400753021240234,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326745_non_optimized",
|
|
"timestamp": 1763326746.1228704,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 19,
|
|
"total_time": 0.3396615982055664,
|
|
"tokens_per_second": 55.93802802665087,
|
|
"time_per_token": 17.8769262213456,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326745_optimized",
|
|
"timestamp": 1763326746.1515133,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 9,
|
|
"total_time": 0.02747321128845215,
|
|
"tokens_per_second": 327.59184594423374,
|
|
"time_per_token": 3.052579032050239,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326749_non_optimized",
|
|
"timestamp": 1763326749.8801262,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 0,
|
|
"total_time": 0.35973238945007324,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326749_optimized",
|
|
"timestamp": 1763326749.9388814,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 20,
|
|
"total_time": 0.05757498741149902,
|
|
"tokens_per_second": 347.37306770136695,
|
|
"time_per_token": 2.878749370574951,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326753_non_optimized",
|
|
"timestamp": 1763326753.6588142,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 22,
|
|
"total_time": 0.3657264709472656,
|
|
"tokens_per_second": 60.154245720901606,
|
|
"time_per_token": 16.62393049760298,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326753_optimized",
|
|
"timestamp": 1763326753.6631608,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 0,
|
|
"total_time": 0.0031464099884033203,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326757_non_optimized",
|
|
"timestamp": 1763326757.3905053,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 33,
|
|
"total_time": 0.36070775985717773,
|
|
"tokens_per_second": 91.4868036469921,
|
|
"time_per_token": 10.930538177490234,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326757_optimized",
|
|
"timestamp": 1763326757.4786851,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 31,
|
|
"total_time": 0.08695292472839355,
|
|
"tokens_per_second": 356.5147474548062,
|
|
"time_per_token": 2.804933055754631,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326760_non_optimized",
|
|
"timestamp": 1763326761.2264946,
|
|
"optimized": false,
|
|
"prompt_length": 49,
|
|
"generated_length": 17,
|
|
"total_time": 0.38045310974121094,
|
|
"tokens_per_second": 44.683561691908935,
|
|
"time_per_token": 22.379594690659466,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326760_optimized",
|
|
"timestamp": 1763326761.3133712,
|
|
"optimized": true,
|
|
"prompt_length": 49,
|
|
"generated_length": 31,
|
|
"total_time": 0.08566641807556152,
|
|
"tokens_per_second": 361.8687543659949,
|
|
"time_per_token": 2.763432841147146,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326764_non_optimized",
|
|
"timestamp": 1763326765.0356681,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 37,
|
|
"total_time": 0.3631327152252197,
|
|
"tokens_per_second": 101.89112258049266,
|
|
"time_per_token": 9.814397708789722,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326764_optimized",
|
|
"timestamp": 1763326765.0750856,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 13,
|
|
"total_time": 0.03816509246826172,
|
|
"tokens_per_second": 340.62540293360655,
|
|
"time_per_token": 2.9357763437124396,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326768_non_optimized",
|
|
"timestamp": 1763326768.767545,
|
|
"optimized": false,
|
|
"prompt_length": 68,
|
|
"generated_length": 19,
|
|
"total_time": 0.331697940826416,
|
|
"tokens_per_second": 57.281030906197486,
|
|
"time_per_token": 17.457786359285052,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326768_optimized",
|
|
"timestamp": 1763326768.7959456,
|
|
"optimized": true,
|
|
"prompt_length": 68,
|
|
"generated_length": 9,
|
|
"total_time": 0.027106285095214844,
|
|
"tokens_per_second": 332.0263167153362,
|
|
"time_per_token": 3.011809455023872,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326772_non_optimized",
|
|
"timestamp": 1763326772.5260317,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 10,
|
|
"total_time": 0.36326122283935547,
|
|
"tokens_per_second": 27.52839932056906,
|
|
"time_per_token": 36.32612228393555,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326772_optimized",
|
|
"timestamp": 1763326772.5303962,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 0,
|
|
"total_time": 0.003071308135986328,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326775_non_optimized",
|
|
"timestamp": 1763326776.254764,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 25,
|
|
"total_time": 0.3601036071777344,
|
|
"tokens_per_second": 69.42446424220596,
|
|
"time_per_token": 14.404144287109375,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326775_optimized",
|
|
"timestamp": 1763326776.2603817,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030698776245117188,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326779_non_optimized",
|
|
"timestamp": 1763326779.9732888,
|
|
"optimized": false,
|
|
"prompt_length": 62,
|
|
"generated_length": 0,
|
|
"total_time": 0.3485872745513916,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326779_optimized",
|
|
"timestamp": 1763326780.019289,
|
|
"optimized": true,
|
|
"prompt_length": 62,
|
|
"generated_length": 15,
|
|
"total_time": 0.0446467399597168,
|
|
"tokens_per_second": 335.9707789086948,
|
|
"time_per_token": 2.9764493306477866,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326783_non_optimized",
|
|
"timestamp": 1763326783.7825809,
|
|
"optimized": false,
|
|
"prompt_length": 48,
|
|
"generated_length": 37,
|
|
"total_time": 0.3960244655609131,
|
|
"tokens_per_second": 93.42857125656288,
|
|
"time_per_token": 10.703363934078732,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326783_optimized",
|
|
"timestamp": 1763326783.8163621,
|
|
"optimized": true,
|
|
"prompt_length": 48,
|
|
"generated_length": 11,
|
|
"total_time": 0.032434701919555664,
|
|
"tokens_per_second": 339.142934850523,
|
|
"time_per_token": 2.9486092654141514,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326787_non_optimized",
|
|
"timestamp": 1763326787.5348887,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 0,
|
|
"total_time": 0.33835744857788086,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326787_optimized",
|
|
"timestamp": 1763326787.539357,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030851364135742188,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326790_non_optimized",
|
|
"timestamp": 1763326791.2828848,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 0,
|
|
"total_time": 0.37782812118530273,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326790_optimized",
|
|
"timestamp": 1763326791.2873359,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 0,
|
|
"total_time": 0.003094911575317383,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326794_non_optimized",
|
|
"timestamp": 1763326794.9969409,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 15,
|
|
"total_time": 0.3552377223968506,
|
|
"tokens_per_second": 42.225245390023325,
|
|
"time_per_token": 23.682514826456707,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326794_optimized",
|
|
"timestamp": 1763326795.0417602,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 15,
|
|
"total_time": 0.043425798416137695,
|
|
"tokens_per_second": 345.41679248494296,
|
|
"time_per_token": 2.895053227742513,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326798_non_optimized",
|
|
"timestamp": 1763326798.768397,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 0,
|
|
"total_time": 0.3616046905517578,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326798_optimized",
|
|
"timestamp": 1763326798.7728815,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030786991119384766,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326802_non_optimized",
|
|
"timestamp": 1763326802.5161467,
|
|
"optimized": false,
|
|
"prompt_length": 55,
|
|
"generated_length": 8,
|
|
"total_time": 0.367128849029541,
|
|
"tokens_per_second": 21.790714680001297,
|
|
"time_per_token": 45.89110612869263,
|
|
"memory_used_mb": 8.12548828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326802_optimized",
|
|
"timestamp": 1763326802.5774758,
|
|
"optimized": true,
|
|
"prompt_length": 55,
|
|
"generated_length": 21,
|
|
"total_time": 0.05986332893371582,
|
|
"tokens_per_second": 350.79906804468607,
|
|
"time_per_token": 2.8506347111293246,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326805_non_optimized",
|
|
"timestamp": 1763326806.225911,
|
|
"optimized": false,
|
|
"prompt_length": 86,
|
|
"generated_length": 14,
|
|
"total_time": 0.2903730869293213,
|
|
"tokens_per_second": 48.21383464992984,
|
|
"time_per_token": 20.740934780665807,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326805_optimized",
|
|
"timestamp": 1763326806.23062,
|
|
"optimized": true,
|
|
"prompt_length": 86,
|
|
"generated_length": 0,
|
|
"total_time": 0.003251314163208008,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326809_non_optimized",
|
|
"timestamp": 1763326809.8743262,
|
|
"optimized": false,
|
|
"prompt_length": 87,
|
|
"generated_length": 0,
|
|
"total_time": 0.2792494297027588,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326809_optimized",
|
|
"timestamp": 1763326809.8790207,
|
|
"optimized": true,
|
|
"prompt_length": 87,
|
|
"generated_length": 0,
|
|
"total_time": 0.0032303333282470703,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326813_non_optimized",
|
|
"timestamp": 1763326813.5236838,
|
|
"optimized": false,
|
|
"prompt_length": 92,
|
|
"generated_length": 8,
|
|
"total_time": 0.2737746238708496,
|
|
"tokens_per_second": 29.22111584663858,
|
|
"time_per_token": 34.2218279838562,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326813_optimized",
|
|
"timestamp": 1763326813.5475192,
|
|
"optimized": true,
|
|
"prompt_length": 92,
|
|
"generated_length": 8,
|
|
"total_time": 0.022355318069458008,
|
|
"tokens_per_second": 357.8566842638511,
|
|
"time_per_token": 2.794414758682251,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326816_non_optimized",
|
|
"timestamp": 1763326817.1884215,
|
|
"optimized": false,
|
|
"prompt_length": 91,
|
|
"generated_length": 9,
|
|
"total_time": 0.2681541442871094,
|
|
"tokens_per_second": 33.56278540436731,
|
|
"time_per_token": 29.794904920789932,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326816_optimized",
|
|
"timestamp": 1763326817.193147,
|
|
"optimized": true,
|
|
"prompt_length": 91,
|
|
"generated_length": 0,
|
|
"total_time": 0.0032160282135009766,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326820_non_optimized",
|
|
"timestamp": 1763326820.8178222,
|
|
"optimized": false,
|
|
"prompt_length": 96,
|
|
"generated_length": 0,
|
|
"total_time": 0.25312256813049316,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326820_optimized",
|
|
"timestamp": 1763326820.8225522,
|
|
"optimized": true,
|
|
"prompt_length": 96,
|
|
"generated_length": 0,
|
|
"total_time": 0.003218412399291992,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326824_non_optimized",
|
|
"timestamp": 1763326824.1973226,
|
|
"optimized": false,
|
|
"prompt_length": 100,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023245811462402344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326824_optimized",
|
|
"timestamp": 1763326824.198983,
|
|
"optimized": true,
|
|
"prompt_length": 100,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003190040588378906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326827_non_optimized",
|
|
"timestamp": 1763326827.7810934,
|
|
"optimized": false,
|
|
"prompt_length": 98,
|
|
"generated_length": 2,
|
|
"total_time": 0.24810361862182617,
|
|
"tokens_per_second": 8.061148044150517,
|
|
"time_per_token": 124.05180931091309,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326827_optimized",
|
|
"timestamp": 1763326827.7893782,
|
|
"optimized": true,
|
|
"prompt_length": 98,
|
|
"generated_length": 2,
|
|
"total_time": 0.006571054458618164,
|
|
"tokens_per_second": 304.3651536591561,
|
|
"time_per_token": 3.285527229309082,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326831_non_optimized",
|
|
"timestamp": 1763326831.4181712,
|
|
"optimized": false,
|
|
"prompt_length": 95,
|
|
"generated_length": 5,
|
|
"total_time": 0.26477837562561035,
|
|
"tokens_per_second": 18.883717328449315,
|
|
"time_per_token": 52.95567512512207,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326831_optimized",
|
|
"timestamp": 1763326831.425718,
|
|
"optimized": true,
|
|
"prompt_length": 95,
|
|
"generated_length": 1,
|
|
"total_time": 0.00593876838684082,
|
|
"tokens_per_second": 168.38508169737847,
|
|
"time_per_token": 5.93876838684082,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326834_non_optimized",
|
|
"timestamp": 1763326835.0461798,
|
|
"optimized": false,
|
|
"prompt_length": 99,
|
|
"generated_length": 1,
|
|
"total_time": 0.25382065773010254,
|
|
"tokens_per_second": 3.9397896488919324,
|
|
"time_per_token": 253.82065773010254,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326834_optimized",
|
|
"timestamp": 1763326835.0514338,
|
|
"optimized": true,
|
|
"prompt_length": 99,
|
|
"generated_length": 1,
|
|
"total_time": 0.0036373138427734375,
|
|
"tokens_per_second": 274.9281594126901,
|
|
"time_per_token": 3.6373138427734375,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326838_non_optimized",
|
|
"timestamp": 1763326838.404944,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002460479736328125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326838_optimized",
|
|
"timestamp": 1763326838.4067247,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032591819763183594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326841_non_optimized",
|
|
"timestamp": 1763326841.7476485,
|
|
"optimized": false,
|
|
"prompt_length": 111,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024366378784179688,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326841_optimized",
|
|
"timestamp": 1763326841.7494369,
|
|
"optimized": true,
|
|
"prompt_length": 111,
|
|
"generated_length": 0,
|
|
"total_time": 0.000324249267578125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326845_non_optimized",
|
|
"timestamp": 1763326845.076511,
|
|
"optimized": false,
|
|
"prompt_length": 104,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002353191375732422,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326845_optimized",
|
|
"timestamp": 1763326845.078336,
|
|
"optimized": true,
|
|
"prompt_length": 104,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003311634063720703,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326848_non_optimized",
|
|
"timestamp": 1763326848.650482,
|
|
"optimized": false,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.2599804401397705,
|
|
"tokens_per_second": 11.539329644903832,
|
|
"time_per_token": 86.66014671325684,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326848_optimized",
|
|
"timestamp": 1763326848.655401,
|
|
"optimized": true,
|
|
"prompt_length": 97,
|
|
"generated_length": 0,
|
|
"total_time": 0.0032651424407958984,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326851_non_optimized",
|
|
"timestamp": 1763326852.0037806,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024008750915527344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326851_optimized",
|
|
"timestamp": 1763326852.0057652,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032830238342285156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326855_non_optimized",
|
|
"timestamp": 1763326855.3215888,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002357959747314453,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326855_optimized",
|
|
"timestamp": 1763326855.3233974,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003199577331542969,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326858_non_optimized",
|
|
"timestamp": 1763326858.6427326,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002455711364746094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326858_optimized",
|
|
"timestamp": 1763326858.6446369,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003333091735839844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326861_non_optimized",
|
|
"timestamp": 1763326862.2181683,
|
|
"optimized": false,
|
|
"prompt_length": 94,
|
|
"generated_length": 0,
|
|
"total_time": 0.26184773445129395,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326861_optimized",
|
|
"timestamp": 1763326862.2230783,
|
|
"optimized": true,
|
|
"prompt_length": 94,
|
|
"generated_length": 0,
|
|
"total_time": 0.0032091140747070312,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326865_non_optimized",
|
|
"timestamp": 1763326865.582198,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.000240325927734375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326865_optimized",
|
|
"timestamp": 1763326865.5841131,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003209114074707031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326868_non_optimized",
|
|
"timestamp": 1763326868.9020677,
|
|
"optimized": false,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002429485321044922,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326868_optimized",
|
|
"timestamp": 1763326868.9039805,
|
|
"optimized": true,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.00033211708068847656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326872_non_optimized",
|
|
"timestamp": 1763326872.2377539,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024199485778808594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326872_optimized",
|
|
"timestamp": 1763326872.2396657,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032520294189453125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326875_non_optimized",
|
|
"timestamp": 1763326875.572948,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002472400665283203,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326875_optimized",
|
|
"timestamp": 1763326875.5749235,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.000339508056640625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326878_non_optimized",
|
|
"timestamp": 1763326878.917066,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023293495178222656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326878_optimized",
|
|
"timestamp": 1763326878.9190118,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032639503479003906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326882_non_optimized",
|
|
"timestamp": 1763326882.2492902,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023746490478515625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326882_optimized",
|
|
"timestamp": 1763326882.251233,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003230571746826172,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326885_non_optimized",
|
|
"timestamp": 1763326885.5688546,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002415180206298828,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326885_optimized",
|
|
"timestamp": 1763326885.5708404,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032401084899902344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326888_non_optimized",
|
|
"timestamp": 1763326888.8873081,
|
|
"optimized": false,
|
|
"prompt_length": 114,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024056434631347656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326888_optimized",
|
|
"timestamp": 1763326888.8893402,
|
|
"optimized": true,
|
|
"prompt_length": 114,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003287792205810547,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326892_non_optimized",
|
|
"timestamp": 1763326892.2040594,
|
|
"optimized": false,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002467632293701172,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326892_optimized",
|
|
"timestamp": 1763326892.2060897,
|
|
"optimized": true,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032711029052734375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326895_non_optimized",
|
|
"timestamp": 1763326895.5235405,
|
|
"optimized": false,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023317337036132812,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326895_optimized",
|
|
"timestamp": 1763326895.5255647,
|
|
"optimized": true,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003211498260498047,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326898_non_optimized",
|
|
"timestamp": 1763326898.837703,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002307891845703125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326898_optimized",
|
|
"timestamp": 1763326898.839746,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032401084899902344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326902_non_optimized",
|
|
"timestamp": 1763326902.1575205,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002338886260986328,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326902_optimized",
|
|
"timestamp": 1763326902.1595387,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003142356872558594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326905_non_optimized",
|
|
"timestamp": 1763326905.4874718,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023674964904785156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326905_optimized",
|
|
"timestamp": 1763326905.4895573,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00033164024353027344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326908_non_optimized",
|
|
"timestamp": 1763326908.803102,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025081634521484375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326908_optimized",
|
|
"timestamp": 1763326908.8052208,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00031828880310058594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326912_non_optimized",
|
|
"timestamp": 1763326912.1360228,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023627281188964844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326912_optimized",
|
|
"timestamp": 1763326912.1381037,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003142356872558594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326915_non_optimized",
|
|
"timestamp": 1763326915.4476118,
|
|
"optimized": false,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002300739288330078,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326915_optimized",
|
|
"timestamp": 1763326915.449725,
|
|
"optimized": true,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003192424774169922,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326918_non_optimized",
|
|
"timestamp": 1763326918.7690768,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023889541625976562,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326918_optimized",
|
|
"timestamp": 1763326918.7712831,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032782554626464844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326922_non_optimized",
|
|
"timestamp": 1763326922.0759387,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002472400665283203,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326922_optimized",
|
|
"timestamp": 1763326922.078079,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.000324249267578125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326925_non_optimized",
|
|
"timestamp": 1763326925.3886678,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023698806762695312,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326925_optimized",
|
|
"timestamp": 1763326925.3908367,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032520294189453125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326928_non_optimized",
|
|
"timestamp": 1763326928.6967072,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002300739288330078,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326928_optimized",
|
|
"timestamp": 1763326928.6988983,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003209114074707031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326932_non_optimized",
|
|
"timestamp": 1763326932.2863104,
|
|
"optimized": false,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.25337719917297363,
|
|
"tokens_per_second": 11.840055102795507,
|
|
"time_per_token": 84.45906639099121,
|
|
"memory_used_mb": 8.1259765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326932_optimized",
|
|
"timestamp": 1763326932.2974646,
|
|
"optimized": true,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.008779525756835938,
|
|
"tokens_per_second": 341.70410601781447,
|
|
"time_per_token": 2.926508585611979,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326935_non_optimized",
|
|
"timestamp": 1763326935.6391814,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023484230041503906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326935_optimized",
|
|
"timestamp": 1763326935.6413832,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00031876564025878906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326938_non_optimized",
|
|
"timestamp": 1763326938.9412687,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023102760314941406,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326938_optimized",
|
|
"timestamp": 1763326938.9434795,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003178119659423828,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326942_non_optimized",
|
|
"timestamp": 1763326942.2639933,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024819374084472656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326942_optimized",
|
|
"timestamp": 1763326942.266253,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00033283233642578125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326945_non_optimized",
|
|
"timestamp": 1763326945.5876389,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024247169494628906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326945_optimized",
|
|
"timestamp": 1763326945.5899763,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003368854522705078,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326948_non_optimized",
|
|
"timestamp": 1763326948.9048371,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002422332763671875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326948_optimized",
|
|
"timestamp": 1763326948.9071276,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003273487091064453,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326952_non_optimized",
|
|
"timestamp": 1763326952.2211714,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023603439331054688,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326952_optimized",
|
|
"timestamp": 1763326952.2234442,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032329559326171875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326955_non_optimized",
|
|
"timestamp": 1763326955.5458589,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002446174621582031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326955_optimized",
|
|
"timestamp": 1763326955.5481744,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00031685829162597656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326958_non_optimized",
|
|
"timestamp": 1763326958.8506002,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00022912025451660156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326958_optimized",
|
|
"timestamp": 1763326958.85293,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003216266632080078,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326962_non_optimized",
|
|
"timestamp": 1763326962.1738048,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024080276489257812,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326962_optimized",
|
|
"timestamp": 1763326962.1762178,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003228187561035156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326965_non_optimized",
|
|
"timestamp": 1763326965.4970171,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023508071899414062,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326965_optimized",
|
|
"timestamp": 1763326965.4993877,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003216266632080078,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326968_non_optimized",
|
|
"timestamp": 1763326968.8236582,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002491474151611328,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326968_optimized",
|
|
"timestamp": 1763326968.826099,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003414154052734375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326972_non_optimized",
|
|
"timestamp": 1763326972.1689265,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025177001953125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0009765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763326972_optimized",
|
|
"timestamp": 1763326972.171371,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003230571746826172,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 0.0,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328146_non_optimized",
|
|
"timestamp": 1763328146.8811655,
|
|
"optimized": false,
|
|
"prompt_length": 53,
|
|
"generated_length": 21,
|
|
"total_time": 0.38083600997924805,
|
|
"tokens_per_second": 55.14184438899121,
|
|
"time_per_token": 18.135048094249907,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328146_optimized",
|
|
"timestamp": 1763328146.9318924,
|
|
"optimized": true,
|
|
"prompt_length": 53,
|
|
"generated_length": 17,
|
|
"total_time": 0.04855465888977051,
|
|
"tokens_per_second": 350.1208820886508,
|
|
"time_per_token": 2.856156405280618,
|
|
"memory_used_mb": 83.21142578125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328150_non_optimized",
|
|
"timestamp": 1763328150.6292245,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 6,
|
|
"total_time": 0.3474395275115967,
|
|
"tokens_per_second": 17.269192261953368,
|
|
"time_per_token": 57.90658791859945,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328150_optimized",
|
|
"timestamp": 1763328150.6806788,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 17,
|
|
"total_time": 0.049269676208496094,
|
|
"tokens_per_second": 345.0398157288582,
|
|
"time_per_token": 2.8982162475585938,
|
|
"memory_used_mb": 83.58984375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328154_non_optimized",
|
|
"timestamp": 1763328154.400125,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.35491299629211426,
|
|
"tokens_per_second": 112.7036778531425,
|
|
"time_per_token": 8.872824907302856,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328154_optimized",
|
|
"timestamp": 1763328154.4400854,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 13,
|
|
"total_time": 0.03777933120727539,
|
|
"tokens_per_second": 344.1034974567393,
|
|
"time_per_token": 2.9061024005596456,
|
|
"memory_used_mb": 83.29736328125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328157_non_optimized",
|
|
"timestamp": 1763328158.1473851,
|
|
"optimized": false,
|
|
"prompt_length": 61,
|
|
"generated_length": 0,
|
|
"total_time": 0.3587629795074463,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328157_optimized",
|
|
"timestamp": 1763328158.16346,
|
|
"optimized": true,
|
|
"prompt_length": 61,
|
|
"generated_length": 4,
|
|
"total_time": 0.013820648193359375,
|
|
"tokens_per_second": 289.42202594534916,
|
|
"time_per_token": 3.4551620483398438,
|
|
"memory_used_mb": 83.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328161_non_optimized",
|
|
"timestamp": 1763328161.8730447,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 0,
|
|
"total_time": 0.3552854061126709,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328161_optimized",
|
|
"timestamp": 1763328161.8809595,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 0,
|
|
"total_time": 0.0031197071075439453,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.90087890625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328165_non_optimized",
|
|
"timestamp": 1763328165.5648606,
|
|
"optimized": false,
|
|
"prompt_length": 64,
|
|
"generated_length": 6,
|
|
"total_time": 0.3446638584136963,
|
|
"tokens_per_second": 17.40826562905318,
|
|
"time_per_token": 57.443976402282715,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328165_optimized",
|
|
"timestamp": 1763328165.572956,
|
|
"optimized": true,
|
|
"prompt_length": 64,
|
|
"generated_length": 1,
|
|
"total_time": 0.0058612823486328125,
|
|
"tokens_per_second": 170.61112918971688,
|
|
"time_per_token": 5.8612823486328125,
|
|
"memory_used_mb": 83.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328169_non_optimized",
|
|
"timestamp": 1763328169.4239416,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 12,
|
|
"total_time": 0.37118029594421387,
|
|
"tokens_per_second": 32.32930231243613,
|
|
"time_per_token": 30.93169132868449,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328169_optimized",
|
|
"timestamp": 1763328169.4591513,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 11,
|
|
"total_time": 0.03293347358703613,
|
|
"tokens_per_second": 334.0066747265317,
|
|
"time_per_token": 2.9939521442760118,
|
|
"memory_used_mb": 83.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328172_non_optimized",
|
|
"timestamp": 1763328173.1438408,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 0,
|
|
"total_time": 0.3334786891937256,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328172_optimized",
|
|
"timestamp": 1763328173.149282,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 0,
|
|
"total_time": 0.003131389617919922,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 83.09716796875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328176_non_optimized",
|
|
"timestamp": 1763328176.8386126,
|
|
"optimized": false,
|
|
"prompt_length": 63,
|
|
"generated_length": 11,
|
|
"total_time": 0.3452491760253906,
|
|
"tokens_per_second": 31.86104635103033,
|
|
"time_per_token": 31.386288729580965,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328176_optimized",
|
|
"timestamp": 1763328176.8440063,
|
|
"optimized": true,
|
|
"prompt_length": 63,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030989646911621094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.984375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328180_non_optimized",
|
|
"timestamp": 1763328180.5212622,
|
|
"optimized": false,
|
|
"prompt_length": 73,
|
|
"generated_length": 15,
|
|
"total_time": 0.3206193447113037,
|
|
"tokens_per_second": 46.78445093045305,
|
|
"time_per_token": 21.374622980753582,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328180_optimized",
|
|
"timestamp": 1763328180.598697,
|
|
"optimized": true,
|
|
"prompt_length": 73,
|
|
"generated_length": 27,
|
|
"total_time": 0.07303571701049805,
|
|
"tokens_per_second": 369.6821377973062,
|
|
"time_per_token": 2.7050265559443725,
|
|
"memory_used_mb": 84.07080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328183_non_optimized",
|
|
"timestamp": 1763328184.3314278,
|
|
"optimized": false,
|
|
"prompt_length": 46,
|
|
"generated_length": 36,
|
|
"total_time": 0.3898129463195801,
|
|
"tokens_per_second": 92.3519866127949,
|
|
"time_per_token": 10.828137397766113,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328183_optimized",
|
|
"timestamp": 1763328184.429176,
|
|
"optimized": true,
|
|
"prompt_length": 46,
|
|
"generated_length": 35,
|
|
"total_time": 0.09540414810180664,
|
|
"tokens_per_second": 366.8603587618767,
|
|
"time_per_token": 2.7258328029087613,
|
|
"memory_used_mb": 83.5302734375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328187_non_optimized",
|
|
"timestamp": 1763328188.1023467,
|
|
"optimized": false,
|
|
"prompt_length": 70,
|
|
"generated_length": 3,
|
|
"total_time": 0.32543349266052246,
|
|
"tokens_per_second": 9.218473413642966,
|
|
"time_per_token": 108.47783088684082,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328187_optimized",
|
|
"timestamp": 1763328188.1267638,
|
|
"optimized": true,
|
|
"prompt_length": 70,
|
|
"generated_length": 7,
|
|
"total_time": 0.022072315216064453,
|
|
"tokens_per_second": 317.13936356369766,
|
|
"time_per_token": 3.1531878880092075,
|
|
"memory_used_mb": 83.4140625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328191_non_optimized",
|
|
"timestamp": 1763328191.8269985,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 25,
|
|
"total_time": 0.3369431495666504,
|
|
"tokens_per_second": 74.1964928865686,
|
|
"time_per_token": 13.477725982666016,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328191_optimized",
|
|
"timestamp": 1763328191.8957512,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 23,
|
|
"total_time": 0.06402778625488281,
|
|
"tokens_per_second": 359.21904137746134,
|
|
"time_per_token": 2.783816793690557,
|
|
"memory_used_mb": 83.7685546875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328195_non_optimized",
|
|
"timestamp": 1763328195.6528826,
|
|
"optimized": false,
|
|
"prompt_length": 50,
|
|
"generated_length": 0,
|
|
"total_time": 0.38687896728515625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328195_optimized",
|
|
"timestamp": 1763328195.7567282,
|
|
"optimized": true,
|
|
"prompt_length": 50,
|
|
"generated_length": 37,
|
|
"total_time": 0.10147953033447266,
|
|
"tokens_per_second": 364.60555028240094,
|
|
"time_per_token": 2.7426900090398014,
|
|
"memory_used_mb": 83.7080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328199_non_optimized",
|
|
"timestamp": 1763328199.4423645,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.3354356288909912,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328199_optimized",
|
|
"timestamp": 1763328199.4558053,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 3,
|
|
"total_time": 0.011044025421142578,
|
|
"tokens_per_second": 271.6400846250162,
|
|
"time_per_token": 3.681341807047526,
|
|
"memory_used_mb": 83.18359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328202_non_optimized",
|
|
"timestamp": 1763328203.2210126,
|
|
"optimized": false,
|
|
"prompt_length": 43,
|
|
"generated_length": 26,
|
|
"total_time": 0.40613532066345215,
|
|
"tokens_per_second": 64.01807150761248,
|
|
"time_per_token": 15.620589256286621,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328202_optimized",
|
|
"timestamp": 1763328203.314865,
|
|
"optimized": true,
|
|
"prompt_length": 43,
|
|
"generated_length": 33,
|
|
"total_time": 0.09137701988220215,
|
|
"tokens_per_second": 361.1411276329831,
|
|
"time_per_token": 2.7690006024909746,
|
|
"memory_used_mb": 83.38427734375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328206_non_optimized",
|
|
"timestamp": 1763328207.059134,
|
|
"optimized": false,
|
|
"prompt_length": 61,
|
|
"generated_length": 0,
|
|
"total_time": 0.3586292266845703,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328206_optimized",
|
|
"timestamp": 1763328207.0646808,
|
|
"optimized": true,
|
|
"prompt_length": 61,
|
|
"generated_length": 0,
|
|
"total_time": 0.003091096878051758,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.9287109375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328210_non_optimized",
|
|
"timestamp": 1763328210.8018575,
|
|
"optimized": false,
|
|
"prompt_length": 58,
|
|
"generated_length": 3,
|
|
"total_time": 0.3591897487640381,
|
|
"tokens_per_second": 8.35213145787962,
|
|
"time_per_token": 119.72991625467937,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328210_optimized",
|
|
"timestamp": 1763328210.857825,
|
|
"optimized": true,
|
|
"prompt_length": 58,
|
|
"generated_length": 19,
|
|
"total_time": 0.053505897521972656,
|
|
"tokens_per_second": 355.1010426878175,
|
|
"time_per_token": 2.8160998695775086,
|
|
"memory_used_mb": 83.4130859375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328214_non_optimized",
|
|
"timestamp": 1763328214.5682755,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.3383166790008545,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328214_optimized",
|
|
"timestamp": 1763328214.5738587,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 0,
|
|
"total_time": 0.003110647201538086,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 83.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328217_non_optimized",
|
|
"timestamp": 1763328218.2879,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 4,
|
|
"total_time": 0.3340427875518799,
|
|
"tokens_per_second": 11.974513891813226,
|
|
"time_per_token": 83.51069688796997,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328217_optimized",
|
|
"timestamp": 1763328218.3589163,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 24,
|
|
"total_time": 0.0684976577758789,
|
|
"tokens_per_second": 350.37694396101637,
|
|
"time_per_token": 2.8540690739949546,
|
|
"memory_used_mb": 83.828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328221_non_optimized",
|
|
"timestamp": 1763328222.0590568,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.3572876453399658,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328221_optimized",
|
|
"timestamp": 1763328222.0646763,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.0031256675720214844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.87353515625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328225_non_optimized",
|
|
"timestamp": 1763328225.7766004,
|
|
"optimized": false,
|
|
"prompt_length": 63,
|
|
"generated_length": 0,
|
|
"total_time": 0.3473188877105713,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328225_optimized",
|
|
"timestamp": 1763328225.7822132,
|
|
"optimized": true,
|
|
"prompt_length": 63,
|
|
"generated_length": 0,
|
|
"total_time": 0.0031120777130126953,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.984375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328229_non_optimized",
|
|
"timestamp": 1763328229.5409186,
|
|
"optimized": false,
|
|
"prompt_length": 53,
|
|
"generated_length": 18,
|
|
"total_time": 0.3730945587158203,
|
|
"tokens_per_second": 48.245142094688894,
|
|
"time_per_token": 20.72747548421224,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328229_optimized",
|
|
"timestamp": 1763328229.5943456,
|
|
"optimized": true,
|
|
"prompt_length": 53,
|
|
"generated_length": 18,
|
|
"total_time": 0.05087685585021973,
|
|
"tokens_per_second": 353.79544783568343,
|
|
"time_per_token": 2.8264919916788735,
|
|
"memory_used_mb": 83.240234375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328232_non_optimized",
|
|
"timestamp": 1763328233.287513,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 0,
|
|
"total_time": 0.3368711471557617,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328232_optimized",
|
|
"timestamp": 1763328233.358895,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 24,
|
|
"total_time": 0.06883907318115234,
|
|
"tokens_per_second": 348.63920867794354,
|
|
"time_per_token": 2.8682947158813477,
|
|
"memory_used_mb": 83.828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328236_non_optimized",
|
|
"timestamp": 1763328237.07395,
|
|
"optimized": false,
|
|
"prompt_length": 64,
|
|
"generated_length": 10,
|
|
"total_time": 0.34280872344970703,
|
|
"tokens_per_second": 29.170786260540087,
|
|
"time_per_token": 34.2808723449707,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328236_optimized",
|
|
"timestamp": 1763328237.1121686,
|
|
"optimized": true,
|
|
"prompt_length": 64,
|
|
"generated_length": 12,
|
|
"total_time": 0.035607337951660156,
|
|
"tokens_per_second": 337.0091865977449,
|
|
"time_per_token": 2.967278162638346,
|
|
"memory_used_mb": 83.38427734375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328240_non_optimized",
|
|
"timestamp": 1763328240.8759682,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 40,
|
|
"total_time": 0.37175869941711426,
|
|
"tokens_per_second": 107.59667510865668,
|
|
"time_per_token": 9.293967485427856,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328240_optimized",
|
|
"timestamp": 1763328240.8816338,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030775070190429688,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.73681640625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328244_non_optimized",
|
|
"timestamp": 1763328244.644577,
|
|
"optimized": false,
|
|
"prompt_length": 51,
|
|
"generated_length": 32,
|
|
"total_time": 0.3875398635864258,
|
|
"tokens_per_second": 82.5721506527383,
|
|
"time_per_token": 12.110620737075806,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328244_optimized",
|
|
"timestamp": 1763328244.7303627,
|
|
"optimized": true,
|
|
"prompt_length": 51,
|
|
"generated_length": 30,
|
|
"total_time": 0.0831613540649414,
|
|
"tokens_per_second": 360.7444868751505,
|
|
"time_per_token": 2.772045135498047,
|
|
"memory_used_mb": 83.5302734375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328248_non_optimized",
|
|
"timestamp": 1763328248.4432864,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 13,
|
|
"total_time": 0.3590657711029053,
|
|
"tokens_per_second": 36.2050661639767,
|
|
"time_per_token": 27.620443930992714,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328248_optimized",
|
|
"timestamp": 1763328248.5309665,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 30,
|
|
"total_time": 0.08289480209350586,
|
|
"tokens_per_second": 361.9044770281231,
|
|
"time_per_token": 2.7631600697835283,
|
|
"memory_used_mb": 83.79736328125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328251_non_optimized",
|
|
"timestamp": 1763328252.2412915,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 0,
|
|
"total_time": 0.3664982318878174,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328251_optimized",
|
|
"timestamp": 1763328252.246967,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030715465545654297,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.791015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328255_non_optimized",
|
|
"timestamp": 1763328255.9404693,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 8,
|
|
"total_time": 0.3392612934112549,
|
|
"tokens_per_second": 23.580644639889247,
|
|
"time_per_token": 42.40766167640686,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328255_optimized",
|
|
"timestamp": 1763328255.9791915,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 12,
|
|
"total_time": 0.036064863204956055,
|
|
"tokens_per_second": 332.73382826393066,
|
|
"time_per_token": 3.0054052670796714,
|
|
"memory_used_mb": 83.4140625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328259_non_optimized",
|
|
"timestamp": 1763328259.683114,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 8,
|
|
"total_time": 0.36543822288513184,
|
|
"tokens_per_second": 21.891525021219906,
|
|
"time_per_token": 45.67977786064148,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328259_optimized",
|
|
"timestamp": 1763328259.728382,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 14,
|
|
"total_time": 0.04237031936645508,
|
|
"tokens_per_second": 330.41997816716747,
|
|
"time_per_token": 3.02645138331822,
|
|
"memory_used_mb": 83.29736328125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328263_non_optimized",
|
|
"timestamp": 1763328263.4533045,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 34,
|
|
"total_time": 0.37717175483703613,
|
|
"tokens_per_second": 90.14460803060481,
|
|
"time_per_token": 11.09328690697165,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328263_optimized",
|
|
"timestamp": 1763328263.459047,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030853748321533203,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.73681640625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328266_non_optimized",
|
|
"timestamp": 1763328267.1686065,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 11,
|
|
"total_time": 0.37029480934143066,
|
|
"tokens_per_second": 29.706060475337207,
|
|
"time_per_token": 33.6631644855846,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328266_optimized",
|
|
"timestamp": 1763328267.2656913,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 33,
|
|
"total_time": 0.09436678886413574,
|
|
"tokens_per_second": 349.6992999042453,
|
|
"time_per_token": 2.8595996625495683,
|
|
"memory_used_mb": 83.7080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328270_non_optimized",
|
|
"timestamp": 1763328270.970244,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 7,
|
|
"total_time": 0.3639066219329834,
|
|
"tokens_per_second": 19.235703826486322,
|
|
"time_per_token": 51.98666027614048,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328270_optimized",
|
|
"timestamp": 1763328270.976036,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.0030896663665771484,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.87353515625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328274_non_optimized",
|
|
"timestamp": 1763328274.6672146,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 0,
|
|
"total_time": 0.3401796817779541,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328274_optimized",
|
|
"timestamp": 1763328274.673098,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 0,
|
|
"total_time": 0.003112316131591797,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 83.04150390625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328278_non_optimized",
|
|
"timestamp": 1763328278.3698728,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 30,
|
|
"total_time": 0.35071659088134766,
|
|
"tokens_per_second": 85.5391526377759,
|
|
"time_per_token": 11.690553029378254,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328278_optimized",
|
|
"timestamp": 1763328278.4178205,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 15,
|
|
"total_time": 0.04522132873535156,
|
|
"tokens_per_second": 331.7018853599899,
|
|
"time_per_token": 3.0147552490234375,
|
|
"memory_used_mb": 83.35498046875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328281_non_optimized",
|
|
"timestamp": 1763328282.1500068,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 12,
|
|
"total_time": 0.3714306354522705,
|
|
"tokens_per_second": 32.30751277526762,
|
|
"time_per_token": 30.952552954355877,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328281_optimized",
|
|
"timestamp": 1763328282.2164667,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 22,
|
|
"total_time": 0.061430931091308594,
|
|
"tokens_per_second": 358.1257781572615,
|
|
"time_per_token": 2.792315049604936,
|
|
"memory_used_mb": 83.4423828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328285_non_optimized",
|
|
"timestamp": 1763328285.9228153,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.35640788078308105,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328285_optimized",
|
|
"timestamp": 1763328285.9286857,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 0,
|
|
"total_time": 0.003099203109741211,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.87353515625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328289_non_optimized",
|
|
"timestamp": 1763328289.66304,
|
|
"optimized": false,
|
|
"prompt_length": 49,
|
|
"generated_length": 39,
|
|
"total_time": 0.38460493087768555,
|
|
"tokens_per_second": 101.40275609831696,
|
|
"time_per_token": 9.86166489429963,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328289_optimized",
|
|
"timestamp": 1763328289.7524455,
|
|
"optimized": true,
|
|
"prompt_length": 49,
|
|
"generated_length": 31,
|
|
"total_time": 0.08660292625427246,
|
|
"tokens_per_second": 357.95557195124974,
|
|
"time_per_token": 2.7936427823958856,
|
|
"memory_used_mb": 83.50048828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328293_non_optimized",
|
|
"timestamp": 1763328293.461601,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 17,
|
|
"total_time": 0.3626689910888672,
|
|
"tokens_per_second": 46.87469956821971,
|
|
"time_per_token": 21.33347006405101,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328293_optimized",
|
|
"timestamp": 1763328293.507706,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 15,
|
|
"total_time": 0.043273210525512695,
|
|
"tokens_per_second": 346.63478438135326,
|
|
"time_per_token": 2.8848807017008467,
|
|
"memory_used_mb": 83.26806640625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328296_non_optimized",
|
|
"timestamp": 1763328297.203139,
|
|
"optimized": false,
|
|
"prompt_length": 68,
|
|
"generated_length": 0,
|
|
"total_time": 0.33896493911743164,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328296_optimized",
|
|
"timestamp": 1763328297.2200859,
|
|
"optimized": true,
|
|
"prompt_length": 68,
|
|
"generated_length": 4,
|
|
"total_time": 0.014091014862060547,
|
|
"tokens_per_second": 283.86883692599235,
|
|
"time_per_token": 3.5227537155151367,
|
|
"memory_used_mb": 83.26904296875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328300_non_optimized",
|
|
"timestamp": 1763328300.9405568,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 18,
|
|
"total_time": 0.361710786819458,
|
|
"tokens_per_second": 49.76351454230864,
|
|
"time_per_token": 20.095043712192112,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328300_optimized",
|
|
"timestamp": 1763328300.9465425,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 0,
|
|
"total_time": 0.0031290054321289062,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 82.81884765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328304_non_optimized",
|
|
"timestamp": 1763328304.6751106,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 26,
|
|
"total_time": 0.36957812309265137,
|
|
"tokens_per_second": 70.35048444542788,
|
|
"time_per_token": 14.214543195871206,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328304_optimized",
|
|
"timestamp": 1763328304.73792,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 21,
|
|
"total_time": 0.059926509857177734,
|
|
"tokens_per_second": 350.4292182216033,
|
|
"time_per_token": 2.8536433265322727,
|
|
"memory_used_mb": 83.4423828125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328308_non_optimized",
|
|
"timestamp": 1763328308.454106,
|
|
"optimized": false,
|
|
"prompt_length": 62,
|
|
"generated_length": 0,
|
|
"total_time": 0.3475470542907715,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328308_optimized",
|
|
"timestamp": 1763328308.494272,
|
|
"optimized": true,
|
|
"prompt_length": 62,
|
|
"generated_length": 12,
|
|
"total_time": 0.03523826599121094,
|
|
"tokens_per_second": 340.5388903924222,
|
|
"time_per_token": 2.9365221659342446,
|
|
"memory_used_mb": 83.326171875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328311_non_optimized",
|
|
"timestamp": 1763328312.2665422,
|
|
"optimized": false,
|
|
"prompt_length": 48,
|
|
"generated_length": 23,
|
|
"total_time": 0.3990597724914551,
|
|
"tokens_per_second": 57.63547615036164,
|
|
"time_per_token": 17.350424890932832,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328311_optimized",
|
|
"timestamp": 1763328312.3281333,
|
|
"optimized": true,
|
|
"prompt_length": 48,
|
|
"generated_length": 21,
|
|
"total_time": 0.05866241455078125,
|
|
"tokens_per_second": 357.9804916113929,
|
|
"time_per_token": 2.7934483119419644,
|
|
"memory_used_mb": 83.1826171875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328315_non_optimized",
|
|
"timestamp": 1763328316.0431576,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 35,
|
|
"total_time": 0.34833741188049316,
|
|
"tokens_per_second": 100.47729243624202,
|
|
"time_per_token": 9.952497482299805,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328315_optimized",
|
|
"timestamp": 1763328316.0491786,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 0,
|
|
"total_time": 0.0031168460845947266,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 83.04150390625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328319_non_optimized",
|
|
"timestamp": 1763328319.7818987,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 0,
|
|
"total_time": 0.3795814514160156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328319_optimized",
|
|
"timestamp": 1763328319.8439722,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 21,
|
|
"total_time": 0.059159040451049805,
|
|
"tokens_per_second": 354.97533157888375,
|
|
"time_per_token": 2.817097164335705,
|
|
"memory_used_mb": 83.35498046875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328323_non_optimized",
|
|
"timestamp": 1763328323.5640802,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 6,
|
|
"total_time": 0.34952616691589355,
|
|
"tokens_per_second": 17.16609675590835,
|
|
"time_per_token": 58.254361152648926,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328323_optimized",
|
|
"timestamp": 1763328323.6400027,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 26,
|
|
"total_time": 0.07297134399414062,
|
|
"tokens_per_second": 356.3042500914841,
|
|
"time_per_token": 2.8065901536207933,
|
|
"memory_used_mb": 83.677734375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328326_non_optimized",
|
|
"timestamp": 1763328327.3651006,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 20,
|
|
"total_time": 0.3710308074951172,
|
|
"tokens_per_second": 53.90387966708992,
|
|
"time_per_token": 18.55154037475586,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328326_optimized",
|
|
"timestamp": 1763328327.4108949,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 15,
|
|
"total_time": 0.04281282424926758,
|
|
"tokens_per_second": 350.36230996268864,
|
|
"time_per_token": 2.8541882832845054,
|
|
"memory_used_mb": 83.240234375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328330_non_optimized",
|
|
"timestamp": 1763328331.1276593,
|
|
"optimized": false,
|
|
"prompt_length": 55,
|
|
"generated_length": 0,
|
|
"total_time": 0.3657987117767334,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.0693359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328330_optimized",
|
|
"timestamp": 1763328331.2517033,
|
|
"optimized": true,
|
|
"prompt_length": 55,
|
|
"generated_length": 45,
|
|
"total_time": 0.12110424041748047,
|
|
"tokens_per_second": 371.58071298636867,
|
|
"time_per_token": 2.6912053426106772,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328334_non_optimized",
|
|
"timestamp": 1763328334.8939357,
|
|
"optimized": false,
|
|
"prompt_length": 86,
|
|
"generated_length": 14,
|
|
"total_time": 0.28943705558776855,
|
|
"tokens_per_second": 48.369756842536205,
|
|
"time_per_token": 20.674075399126327,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328334_optimized",
|
|
"timestamp": 1763328334.9002066,
|
|
"optimized": true,
|
|
"prompt_length": 86,
|
|
"generated_length": 0,
|
|
"total_time": 0.003253459930419922,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 83.64208984375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328338_non_optimized",
|
|
"timestamp": 1763328338.600254,
|
|
"optimized": false,
|
|
"prompt_length": 87,
|
|
"generated_length": 13,
|
|
"total_time": 0.28672194480895996,
|
|
"tokens_per_second": 45.340094245879136,
|
|
"time_per_token": 22.055534216073845,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328338_optimized",
|
|
"timestamp": 1763328338.6394484,
|
|
"optimized": true,
|
|
"prompt_length": 87,
|
|
"generated_length": 13,
|
|
"total_time": 0.03615999221801758,
|
|
"tokens_per_second": 359.51335170704044,
|
|
"time_per_token": 2.781537862924429,
|
|
"memory_used_mb": 84.07080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328341_non_optimized",
|
|
"timestamp": 1763328342.2768755,
|
|
"optimized": false,
|
|
"prompt_length": 92,
|
|
"generated_length": 8,
|
|
"total_time": 0.27281713485717773,
|
|
"tokens_per_second": 29.323671345599582,
|
|
"time_per_token": 34.10214185714722,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328341_optimized",
|
|
"timestamp": 1763328342.3042974,
|
|
"optimized": true,
|
|
"prompt_length": 92,
|
|
"generated_length": 8,
|
|
"total_time": 0.021982908248901367,
|
|
"tokens_per_second": 363.91909156968865,
|
|
"time_per_token": 2.747863531112671,
|
|
"memory_used_mb": 84.07080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328345_non_optimized",
|
|
"timestamp": 1763328345.9446712,
|
|
"optimized": false,
|
|
"prompt_length": 91,
|
|
"generated_length": 6,
|
|
"total_time": 0.27774763107299805,
|
|
"tokens_per_second": 21.60234446220379,
|
|
"time_per_token": 46.29127184549967,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328345_optimized",
|
|
"timestamp": 1763328345.961895,
|
|
"optimized": true,
|
|
"prompt_length": 91,
|
|
"generated_length": 4,
|
|
"total_time": 0.014170408248901367,
|
|
"tokens_per_second": 282.27838815512746,
|
|
"time_per_token": 3.542602062225342,
|
|
"memory_used_mb": 83.94921875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328349_non_optimized",
|
|
"timestamp": 1763328349.593344,
|
|
"optimized": false,
|
|
"prompt_length": 96,
|
|
"generated_length": 0,
|
|
"total_time": 0.25559139251708984,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328349_optimized",
|
|
"timestamp": 1763328349.5997064,
|
|
"optimized": true,
|
|
"prompt_length": 96,
|
|
"generated_length": 0,
|
|
"total_time": 0.0032455921173095703,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 83.9384765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328352_non_optimized",
|
|
"timestamp": 1763328352.9822614,
|
|
"optimized": false,
|
|
"prompt_length": 100,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023055076599121094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328352_optimized",
|
|
"timestamp": 1763328352.9855807,
|
|
"optimized": true,
|
|
"prompt_length": 100,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003197193145751953,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328356_non_optimized",
|
|
"timestamp": 1763328356.5787084,
|
|
"optimized": false,
|
|
"prompt_length": 98,
|
|
"generated_length": 2,
|
|
"total_time": 0.24971413612365723,
|
|
"tokens_per_second": 8.00915811594106,
|
|
"time_per_token": 124.85706806182861,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328356_optimized",
|
|
"timestamp": 1763328356.5879197,
|
|
"optimized": true,
|
|
"prompt_length": 98,
|
|
"generated_length": 2,
|
|
"total_time": 0.006028890609741211,
|
|
"tokens_per_second": 331.73599082532525,
|
|
"time_per_token": 3.0144453048706055,
|
|
"memory_used_mb": 84.07080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328359_non_optimized",
|
|
"timestamp": 1763328360.2265294,
|
|
"optimized": false,
|
|
"prompt_length": 95,
|
|
"generated_length": 5,
|
|
"total_time": 0.26633787155151367,
|
|
"tokens_per_second": 18.77314694629497,
|
|
"time_per_token": 53.267574310302734,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328359_optimized",
|
|
"timestamp": 1763328360.2355754,
|
|
"optimized": true,
|
|
"prompt_length": 95,
|
|
"generated_length": 1,
|
|
"total_time": 0.005945444107055664,
|
|
"tokens_per_second": 168.19601395516702,
|
|
"time_per_token": 5.945444107055664,
|
|
"memory_used_mb": 83.97900390625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328363_non_optimized",
|
|
"timestamp": 1763328363.877551,
|
|
"optimized": false,
|
|
"prompt_length": 99,
|
|
"generated_length": 1,
|
|
"total_time": 0.2541792392730713,
|
|
"tokens_per_second": 3.934231618836794,
|
|
"time_per_token": 254.1792392730713,
|
|
"memory_used_mb": 84.0283203125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328363_optimized",
|
|
"timestamp": 1763328363.8867087,
|
|
"optimized": true,
|
|
"prompt_length": 99,
|
|
"generated_length": 0,
|
|
"total_time": 0.0036110877990722656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.029296875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328367_non_optimized",
|
|
"timestamp": 1763328367.2638996,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025200843811035156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328367_optimized",
|
|
"timestamp": 1763328367.2672691,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003268718719482422,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328370_non_optimized",
|
|
"timestamp": 1763328370.609966,
|
|
"optimized": false,
|
|
"prompt_length": 111,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025463104248046875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328370_optimized",
|
|
"timestamp": 1763328370.613386,
|
|
"optimized": true,
|
|
"prompt_length": 111,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032639503479003906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328373_non_optimized",
|
|
"timestamp": 1763328373.9479587,
|
|
"optimized": false,
|
|
"prompt_length": 104,
|
|
"generated_length": 0,
|
|
"total_time": 0.00026726722717285156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328373_optimized",
|
|
"timestamp": 1763328373.9513137,
|
|
"optimized": true,
|
|
"prompt_length": 104,
|
|
"generated_length": 0,
|
|
"total_time": 0.00033593177795410156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328377_non_optimized",
|
|
"timestamp": 1763328377.5480406,
|
|
"optimized": false,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.2508835792541504,
|
|
"tokens_per_second": 11.95773756464522,
|
|
"time_per_token": 83.62785975138347,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328377_optimized",
|
|
"timestamp": 1763328377.5599065,
|
|
"optimized": true,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.008674144744873047,
|
|
"tokens_per_second": 345.85542301137923,
|
|
"time_per_token": 2.891381581624349,
|
|
"memory_used_mb": 84.07080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328380_non_optimized",
|
|
"timestamp": 1763328380.9351366,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002307891845703125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328380_optimized",
|
|
"timestamp": 1763328380.9385371,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003266334533691406,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328384_non_optimized",
|
|
"timestamp": 1763328384.280055,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002384185791015625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328384_optimized",
|
|
"timestamp": 1763328384.2835102,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032138824462890625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328387_non_optimized",
|
|
"timestamp": 1763328387.6202579,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023627281188964844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328387_optimized",
|
|
"timestamp": 1763328387.6237223,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032711029052734375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328390_non_optimized",
|
|
"timestamp": 1763328391.2143693,
|
|
"optimized": false,
|
|
"prompt_length": 94,
|
|
"generated_length": 0,
|
|
"total_time": 0.26361989974975586,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328390_optimized",
|
|
"timestamp": 1763328391.2352936,
|
|
"optimized": true,
|
|
"prompt_length": 94,
|
|
"generated_length": 6,
|
|
"total_time": 0.017615556716918945,
|
|
"tokens_per_second": 340.6080259863301,
|
|
"time_per_token": 2.9359261194864907,
|
|
"memory_used_mb": 84.07080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328394_non_optimized",
|
|
"timestamp": 1763328394.615491,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023412704467773438,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328394_optimized",
|
|
"timestamp": 1763328394.6189466,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003268718719482422,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328397_non_optimized",
|
|
"timestamp": 1763328397.9617507,
|
|
"optimized": false,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023031234741210938,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328397_optimized",
|
|
"timestamp": 1763328397.965208,
|
|
"optimized": true,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003170967102050781,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328401_non_optimized",
|
|
"timestamp": 1763328401.3134701,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002567768096923828,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328401_optimized",
|
|
"timestamp": 1763328401.3170755,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00034499168395996094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328404_non_optimized",
|
|
"timestamp": 1763328404.659879,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002472400665283203,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328404_optimized",
|
|
"timestamp": 1763328404.6633892,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003345012664794922,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328407_non_optimized",
|
|
"timestamp": 1763328407.9927475,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025534629821777344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328407_optimized",
|
|
"timestamp": 1763328407.9964025,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003371238708496094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328411_non_optimized",
|
|
"timestamp": 1763328411.3210385,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023627281188964844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328411_optimized",
|
|
"timestamp": 1763328411.3245878,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003235340118408203,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328414_non_optimized",
|
|
"timestamp": 1763328414.661873,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024056434631347656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328414_optimized",
|
|
"timestamp": 1763328414.6654513,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032258033752441406,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328417_non_optimized",
|
|
"timestamp": 1763328417.996759,
|
|
"optimized": false,
|
|
"prompt_length": 114,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023698806762695312,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328417_optimized",
|
|
"timestamp": 1763328418.000359,
|
|
"optimized": true,
|
|
"prompt_length": 114,
|
|
"generated_length": 0,
|
|
"total_time": 0.00033736228942871094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328421_non_optimized",
|
|
"timestamp": 1763328421.3330092,
|
|
"optimized": false,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002493858337402344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328421_optimized",
|
|
"timestamp": 1763328421.3366413,
|
|
"optimized": true,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003311634063720703,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328424_non_optimized",
|
|
"timestamp": 1763328424.6846545,
|
|
"optimized": false,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002396106719970703,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328424_optimized",
|
|
"timestamp": 1763328424.6882706,
|
|
"optimized": true,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003235340118408203,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328428_non_optimized",
|
|
"timestamp": 1763328428.0342734,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023412704467773438,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328428_optimized",
|
|
"timestamp": 1763328428.0378458,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003237724304199219,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328431_non_optimized",
|
|
"timestamp": 1763328431.3756785,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023365020751953125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328431_optimized",
|
|
"timestamp": 1763328431.3793805,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003218650817871094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328434_non_optimized",
|
|
"timestamp": 1763328434.742513,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023174285888671875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328434_optimized",
|
|
"timestamp": 1763328434.7461731,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00031876564025878906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328438_non_optimized",
|
|
"timestamp": 1763328438.0807612,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002357959747314453,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328438_optimized",
|
|
"timestamp": 1763328438.0844612,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003268718719482422,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328441_non_optimized",
|
|
"timestamp": 1763328441.415201,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023245811462402344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328441_optimized",
|
|
"timestamp": 1763328441.4189343,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00034737586975097656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328444_non_optimized",
|
|
"timestamp": 1763328444.7733834,
|
|
"optimized": false,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023651123046875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328444_optimized",
|
|
"timestamp": 1763328444.7771332,
|
|
"optimized": true,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003209114074707031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328448_non_optimized",
|
|
"timestamp": 1763328448.1191192,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023126602172851562,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328448_optimized",
|
|
"timestamp": 1763328448.1228065,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003218650817871094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328451_non_optimized",
|
|
"timestamp": 1763328451.461982,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024175643920898438,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328451_optimized",
|
|
"timestamp": 1763328451.4657404,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00033092498779296875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328454_non_optimized",
|
|
"timestamp": 1763328454.8150704,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024199485778808594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328454_optimized",
|
|
"timestamp": 1763328454.818905,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003361701965332031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328458_non_optimized",
|
|
"timestamp": 1763328458.150419,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002446174621582031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328458_optimized",
|
|
"timestamp": 1763328458.154374,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003304481506347656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328461_non_optimized",
|
|
"timestamp": 1763328461.7283993,
|
|
"optimized": false,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.2511739730834961,
|
|
"tokens_per_second": 11.943912672045563,
|
|
"time_per_token": 83.7246576944987,
|
|
"memory_used_mb": 84.06982421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328461_optimized",
|
|
"timestamp": 1763328461.7408895,
|
|
"optimized": true,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.008848905563354492,
|
|
"tokens_per_second": 339.0249764246262,
|
|
"time_per_token": 2.9496351877848306,
|
|
"memory_used_mb": 84.07080078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328465_non_optimized",
|
|
"timestamp": 1763328465.1261904,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002315044403076172,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328465_optimized",
|
|
"timestamp": 1763328465.1299899,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00033164024353027344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328468_non_optimized",
|
|
"timestamp": 1763328468.4668736,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002491474151611328,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328468_optimized",
|
|
"timestamp": 1763328468.4707043,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00032591819763183594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328471_non_optimized",
|
|
"timestamp": 1763328471.8006985,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00022912025451660156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328471_optimized",
|
|
"timestamp": 1763328471.8047862,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003237724304199219,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328475_non_optimized",
|
|
"timestamp": 1763328475.1509132,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002551078796386719,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328475_optimized",
|
|
"timestamp": 1763328475.1548111,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003528594970703125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328478_non_optimized",
|
|
"timestamp": 1763328478.5046594,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023484230041503906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328478_optimized",
|
|
"timestamp": 1763328478.5085387,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003216266632080078,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328481_non_optimized",
|
|
"timestamp": 1763328481.850465,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.000232696533203125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328481_optimized",
|
|
"timestamp": 1763328481.854363,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003254413604736328,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328485_non_optimized",
|
|
"timestamp": 1763328485.2010415,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002512931823730469,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328485_optimized",
|
|
"timestamp": 1763328485.2049637,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003311634063720703,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328488_non_optimized",
|
|
"timestamp": 1763328488.538209,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025153160095214844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328488_optimized",
|
|
"timestamp": 1763328488.5422387,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003306865692138672,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328491_non_optimized",
|
|
"timestamp": 1763328491.8936143,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002357959747314453,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328491_optimized",
|
|
"timestamp": 1763328491.8975422,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003228187561035156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328495_non_optimized",
|
|
"timestamp": 1763328495.2653213,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024080276489257812,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328495_optimized",
|
|
"timestamp": 1763328495.2692711,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003209114074707031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328498_non_optimized",
|
|
"timestamp": 1763328498.633191,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002338886260986328,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328498_optimized",
|
|
"timestamp": 1763328498.6372032,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003268718719482422,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328501_non_optimized",
|
|
"timestamp": 1763328501.9915178,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023627281188964844,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763328501_optimized",
|
|
"timestamp": 1763328501.9954562,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.0003197193145751953,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.2470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526557_non_optimized",
|
|
"timestamp": 1763526558.0903924,
|
|
"optimized": false,
|
|
"prompt_length": 53,
|
|
"generated_length": 47,
|
|
"total_time": 0.35442304611206055,
|
|
"tokens_per_second": 132.60988673163106,
|
|
"time_per_token": 7.540915874724693,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526557_optimized",
|
|
"timestamp": 1763526558.221367,
|
|
"optimized": true,
|
|
"prompt_length": 53,
|
|
"generated_length": 47,
|
|
"total_time": 0.12709331512451172,
|
|
"tokens_per_second": 369.8070189919485,
|
|
"time_per_token": 2.7041130877555686,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526561_non_optimized",
|
|
"timestamp": 1763526561.392078,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.3155982494354248,
|
|
"tokens_per_second": 107.73190301537717,
|
|
"time_per_token": 9.282301453983083,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526561_optimized",
|
|
"timestamp": 1763526561.485392,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.08941459655761719,
|
|
"tokens_per_second": 380.25111457155657,
|
|
"time_per_token": 2.629841075224035,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526564_non_optimized",
|
|
"timestamp": 1763526564.6852732,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.3313627243041992,
|
|
"tokens_per_second": 120.71363815586875,
|
|
"time_per_token": 8.28406810760498,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526564_optimized",
|
|
"timestamp": 1763526564.794894,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.10575437545776367,
|
|
"tokens_per_second": 378.2349413616012,
|
|
"time_per_token": 2.643859386444092,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526567_non_optimized",
|
|
"timestamp": 1763526567.9853783,
|
|
"optimized": false,
|
|
"prompt_length": 61,
|
|
"generated_length": 39,
|
|
"total_time": 0.33505964279174805,
|
|
"tokens_per_second": 116.39718730387337,
|
|
"time_per_token": 8.591272892096104,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526567_optimized",
|
|
"timestamp": 1763526568.0917747,
|
|
"optimized": true,
|
|
"prompt_length": 61,
|
|
"generated_length": 39,
|
|
"total_time": 0.10251092910766602,
|
|
"tokens_per_second": 380.4472395234928,
|
|
"time_per_token": 2.628485361735026,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526570_non_optimized",
|
|
"timestamp": 1763526571.282682,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.33055973052978516,
|
|
"tokens_per_second": 121.00687502344086,
|
|
"time_per_token": 8.263993263244629,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526570_optimized",
|
|
"timestamp": 1763526571.3913667,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.10458731651306152,
|
|
"tokens_per_second": 382.4555532506138,
|
|
"time_per_token": 2.614682912826538,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526574_non_optimized",
|
|
"timestamp": 1763526574.5597281,
|
|
"optimized": false,
|
|
"prompt_length": 64,
|
|
"generated_length": 36,
|
|
"total_time": 0.3207573890686035,
|
|
"tokens_per_second": 112.23435913521645,
|
|
"time_per_token": 8.909927474127876,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526574_optimized",
|
|
"timestamp": 1763526574.657765,
|
|
"optimized": true,
|
|
"prompt_length": 64,
|
|
"generated_length": 36,
|
|
"total_time": 0.09418869018554688,
|
|
"tokens_per_second": 382.211494066664,
|
|
"time_per_token": 2.61635250515408,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526577_non_optimized",
|
|
"timestamp": 1763526577.870434,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.34548068046569824,
|
|
"tokens_per_second": 133.1478215742579,
|
|
"time_per_token": 7.510449575341266,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526577_optimized",
|
|
"timestamp": 1763526577.996214,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.12188839912414551,
|
|
"tokens_per_second": 377.39440611692817,
|
|
"time_per_token": 2.649747807046641,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526580_non_optimized",
|
|
"timestamp": 1763526581.1706474,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 33,
|
|
"total_time": 0.31485915184020996,
|
|
"tokens_per_second": 104.80876864188276,
|
|
"time_per_token": 9.5411864194003,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526580_optimized",
|
|
"timestamp": 1763526581.2631214,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 33,
|
|
"total_time": 0.08859968185424805,
|
|
"tokens_per_second": 372.4618340536148,
|
|
"time_per_token": 2.684838844068123,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526584_non_optimized",
|
|
"timestamp": 1763526584.4379666,
|
|
"optimized": false,
|
|
"prompt_length": 63,
|
|
"generated_length": 37,
|
|
"total_time": 0.32149577140808105,
|
|
"tokens_per_second": 115.08705025247488,
|
|
"time_per_token": 8.689074902921108,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526584_optimized",
|
|
"timestamp": 1763526584.538549,
|
|
"optimized": true,
|
|
"prompt_length": 63,
|
|
"generated_length": 37,
|
|
"total_time": 0.09666585922241211,
|
|
"tokens_per_second": 382.7618178499726,
|
|
"time_per_token": 2.612590789794922,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526587_non_optimized",
|
|
"timestamp": 1763526587.7008932,
|
|
"optimized": false,
|
|
"prompt_length": 73,
|
|
"generated_length": 27,
|
|
"total_time": 0.30018019676208496,
|
|
"tokens_per_second": 89.94597342275547,
|
|
"time_per_token": 11.117785065262407,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526587_optimized",
|
|
"timestamp": 1763526587.775923,
|
|
"optimized": true,
|
|
"prompt_length": 73,
|
|
"generated_length": 27,
|
|
"total_time": 0.0710904598236084,
|
|
"tokens_per_second": 379.79779659595874,
|
|
"time_per_token": 2.6329799934669778,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526590_non_optimized",
|
|
"timestamp": 1763526590.9904318,
|
|
"optimized": false,
|
|
"prompt_length": 46,
|
|
"generated_length": 54,
|
|
"total_time": 0.36146068572998047,
|
|
"tokens_per_second": 149.3938404143328,
|
|
"time_per_token": 6.693716402407046,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526590_optimized",
|
|
"timestamp": 1763526591.1326938,
|
|
"optimized": true,
|
|
"prompt_length": 46,
|
|
"generated_length": 54,
|
|
"total_time": 0.1380624771118164,
|
|
"tokens_per_second": 391.1272717225373,
|
|
"time_per_token": 2.556712539107711,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526593_non_optimized",
|
|
"timestamp": 1763526594.285255,
|
|
"optimized": false,
|
|
"prompt_length": 70,
|
|
"generated_length": 30,
|
|
"total_time": 0.3071746826171875,
|
|
"tokens_per_second": 97.66429884258109,
|
|
"time_per_token": 10.239156087239582,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526593_optimized",
|
|
"timestamp": 1763526594.3690982,
|
|
"optimized": true,
|
|
"prompt_length": 70,
|
|
"generated_length": 30,
|
|
"total_time": 0.07981586456298828,
|
|
"tokens_per_second": 375.8651261156847,
|
|
"time_per_token": 2.6605288187662763,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526597_non_optimized",
|
|
"timestamp": 1763526597.541682,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.3228602409362793,
|
|
"tokens_per_second": 105.30872398967932,
|
|
"time_per_token": 9.495889439302333,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526597_optimized",
|
|
"timestamp": 1763526597.6348116,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.08911824226379395,
|
|
"tokens_per_second": 381.515603722956,
|
|
"time_per_token": 2.621124772464528,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526600_non_optimized",
|
|
"timestamp": 1763526600.8545513,
|
|
"optimized": false,
|
|
"prompt_length": 50,
|
|
"generated_length": 50,
|
|
"total_time": 0.3591768741607666,
|
|
"tokens_per_second": 139.20718063162423,
|
|
"time_per_token": 7.183537483215332,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526600_optimized",
|
|
"timestamp": 1763526600.9873838,
|
|
"optimized": true,
|
|
"prompt_length": 50,
|
|
"generated_length": 50,
|
|
"total_time": 0.12882018089294434,
|
|
"tokens_per_second": 388.13794277740044,
|
|
"time_per_token": 2.5764036178588867,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526603_non_optimized",
|
|
"timestamp": 1763526604.1675096,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.323777437210083,
|
|
"tokens_per_second": 105.01040558282972,
|
|
"time_per_token": 9.52286580029656,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526603_optimized",
|
|
"timestamp": 1763526604.2624831,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.09089446067810059,
|
|
"tokens_per_second": 374.060198458185,
|
|
"time_per_token": 2.6733664905323704,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526607_non_optimized",
|
|
"timestamp": 1763526607.4869204,
|
|
"optimized": false,
|
|
"prompt_length": 43,
|
|
"generated_length": 57,
|
|
"total_time": 0.37198734283447266,
|
|
"tokens_per_second": 153.23102008168036,
|
|
"time_per_token": 6.526093733938117,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526607_optimized",
|
|
"timestamp": 1763526607.6380951,
|
|
"optimized": true,
|
|
"prompt_length": 43,
|
|
"generated_length": 57,
|
|
"total_time": 0.14709210395812988,
|
|
"tokens_per_second": 387.51230328600906,
|
|
"time_per_token": 2.580563227335612,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526610_non_optimized",
|
|
"timestamp": 1763526610.8189526,
|
|
"optimized": false,
|
|
"prompt_length": 61,
|
|
"generated_length": 39,
|
|
"total_time": 0.325711727142334,
|
|
"tokens_per_second": 119.73778267724835,
|
|
"time_per_token": 8.351582747239332,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526610_optimized",
|
|
"timestamp": 1763526610.9252608,
|
|
"optimized": true,
|
|
"prompt_length": 61,
|
|
"generated_length": 39,
|
|
"total_time": 0.10221171379089355,
|
|
"tokens_per_second": 381.5609635485308,
|
|
"time_per_token": 2.620813174125476,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526613_non_optimized",
|
|
"timestamp": 1763526614.11627,
|
|
"optimized": false,
|
|
"prompt_length": 58,
|
|
"generated_length": 42,
|
|
"total_time": 0.3369486331939697,
|
|
"tokens_per_second": 124.64807944723742,
|
|
"time_per_token": 8.022586504618326,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526613_optimized",
|
|
"timestamp": 1763526614.2310555,
|
|
"optimized": true,
|
|
"prompt_length": 58,
|
|
"generated_length": 42,
|
|
"total_time": 0.11066627502441406,
|
|
"tokens_per_second": 379.51941538408505,
|
|
"time_per_token": 2.634911310105097,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526617_non_optimized",
|
|
"timestamp": 1763526617.3865545,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.3230869770050049,
|
|
"tokens_per_second": 105.23482040402178,
|
|
"time_per_token": 9.502558147206026,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526617_optimized",
|
|
"timestamp": 1763526617.4830964,
|
|
"optimized": true,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.09008550643920898,
|
|
"tokens_per_second": 377.41920253224856,
|
|
"time_per_token": 2.6495737188002644,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526620_non_optimized",
|
|
"timestamp": 1763526620.6447241,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 33,
|
|
"total_time": 0.32021570205688477,
|
|
"tokens_per_second": 103.05553346705562,
|
|
"time_per_token": 9.703506122935902,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526620_optimized",
|
|
"timestamp": 1763526620.7375038,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 33,
|
|
"total_time": 0.08865785598754883,
|
|
"tokens_per_second": 372.21743783917515,
|
|
"time_per_token": 2.6866016965923887,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526623_non_optimized",
|
|
"timestamp": 1763526623.9323657,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.34384703636169434,
|
|
"tokens_per_second": 119.2390675627982,
|
|
"time_per_token": 8.386513081992545,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526623_optimized",
|
|
"timestamp": 1763526624.045949,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.10931515693664551,
|
|
"tokens_per_second": 375.06235319006936,
|
|
"time_per_token": 2.666223339918183,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526626_non_optimized",
|
|
"timestamp": 1763526627.2393768,
|
|
"optimized": false,
|
|
"prompt_length": 63,
|
|
"generated_length": 37,
|
|
"total_time": 0.3268465995788574,
|
|
"tokens_per_second": 113.20295223408958,
|
|
"time_per_token": 8.83369188050966,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526626_optimized",
|
|
"timestamp": 1763526627.3409727,
|
|
"optimized": true,
|
|
"prompt_length": 63,
|
|
"generated_length": 37,
|
|
"total_time": 0.09740710258483887,
|
|
"tokens_per_second": 379.84909742874277,
|
|
"time_per_token": 2.632624394184834,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526630_non_optimized",
|
|
"timestamp": 1763526630.5474806,
|
|
"optimized": false,
|
|
"prompt_length": 53,
|
|
"generated_length": 47,
|
|
"total_time": 0.35274362564086914,
|
|
"tokens_per_second": 133.2412454360013,
|
|
"time_per_token": 7.505183524273812,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526630_optimized",
|
|
"timestamp": 1763526630.6753845,
|
|
"optimized": true,
|
|
"prompt_length": 53,
|
|
"generated_length": 47,
|
|
"total_time": 0.12363100051879883,
|
|
"tokens_per_second": 380.1635496175846,
|
|
"time_per_token": 2.630446819548911,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526633_non_optimized",
|
|
"timestamp": 1763526633.845649,
|
|
"optimized": false,
|
|
"prompt_length": 67,
|
|
"generated_length": 33,
|
|
"total_time": 0.3140745162963867,
|
|
"tokens_per_second": 105.07060677555408,
|
|
"time_per_token": 9.51740958473899,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526633_optimized",
|
|
"timestamp": 1763526633.9381201,
|
|
"optimized": true,
|
|
"prompt_length": 67,
|
|
"generated_length": 33,
|
|
"total_time": 0.08824992179870605,
|
|
"tokens_per_second": 373.9380084128738,
|
|
"time_per_token": 2.674240054506244,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526636_non_optimized",
|
|
"timestamp": 1763526637.118738,
|
|
"optimized": false,
|
|
"prompt_length": 64,
|
|
"generated_length": 36,
|
|
"total_time": 0.32010531425476074,
|
|
"tokens_per_second": 112.4629876383397,
|
|
"time_per_token": 8.891814284854464,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526636_optimized",
|
|
"timestamp": 1763526637.2179773,
|
|
"optimized": true,
|
|
"prompt_length": 64,
|
|
"generated_length": 36,
|
|
"total_time": 0.09497523307800293,
|
|
"tokens_per_second": 379.04618744587117,
|
|
"time_per_token": 2.6382009188334146,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526640_non_optimized",
|
|
"timestamp": 1763526640.4208992,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.34542250633239746,
|
|
"tokens_per_second": 133.17024558826677,
|
|
"time_per_token": 7.509184920269511,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526640_optimized",
|
|
"timestamp": 1763526640.5462801,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.12082409858703613,
|
|
"tokens_per_second": 380.718751788276,
|
|
"time_per_token": 2.6266108388486114,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526643_non_optimized",
|
|
"timestamp": 1763526643.7587085,
|
|
"optimized": false,
|
|
"prompt_length": 51,
|
|
"generated_length": 49,
|
|
"total_time": 0.35083961486816406,
|
|
"tokens_per_second": 139.66495778537683,
|
|
"time_per_token": 7.159992140166613,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526643_optimized",
|
|
"timestamp": 1763526643.8911211,
|
|
"optimized": true,
|
|
"prompt_length": 51,
|
|
"generated_length": 49,
|
|
"total_time": 0.12817978858947754,
|
|
"tokens_per_second": 382.2755563822367,
|
|
"time_per_token": 2.61591405284648,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526646_non_optimized",
|
|
"timestamp": 1763526647.0730722,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.33283185958862305,
|
|
"tokens_per_second": 120.18080255129306,
|
|
"time_per_token": 8.320796489715576,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526646_optimized",
|
|
"timestamp": 1763526647.1849632,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.10762190818786621,
|
|
"tokens_per_second": 371.6715367114238,
|
|
"time_per_token": 2.6905477046966553,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526650_non_optimized",
|
|
"timestamp": 1763526650.3852422,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 44,
|
|
"total_time": 0.34725308418273926,
|
|
"tokens_per_second": 126.70873781741659,
|
|
"time_per_token": 7.89211554960771,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526650_optimized",
|
|
"timestamp": 1763526650.5039096,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 44,
|
|
"total_time": 0.11433672904968262,
|
|
"tokens_per_second": 384.8282206925889,
|
|
"time_per_token": 2.598562023856423,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526653_non_optimized",
|
|
"timestamp": 1763526653.6856203,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 35,
|
|
"total_time": 0.31678080558776855,
|
|
"tokens_per_second": 110.48649218206107,
|
|
"time_per_token": 9.05088015965053,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526653_optimized",
|
|
"timestamp": 1763526653.7829595,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 35,
|
|
"total_time": 0.09303498268127441,
|
|
"tokens_per_second": 376.2025744649772,
|
|
"time_per_token": 2.658142362322126,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526656_non_optimized",
|
|
"timestamp": 1763526656.986791,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.33247804641723633,
|
|
"tokens_per_second": 123.31641274307752,
|
|
"time_per_token": 8.109220644322837,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526656_optimized",
|
|
"timestamp": 1763526657.097655,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.10654258728027344,
|
|
"tokens_per_second": 384.8226427254337,
|
|
"time_per_token": 2.598599689762767,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526659_non_optimized",
|
|
"timestamp": 1763526660.297603,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.3501875400543213,
|
|
"tokens_per_second": 131.35818593906697,
|
|
"time_per_token": 7.61277260987655,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526659_optimized",
|
|
"timestamp": 1763526660.426392,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.12442255020141602,
|
|
"tokens_per_second": 369.7079020284901,
|
|
"time_per_token": 2.7048380478568697,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526663_non_optimized",
|
|
"timestamp": 1763526663.6286657,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.3505382537841797,
|
|
"tokens_per_second": 131.22676199648498,
|
|
"time_per_token": 7.62039682139521,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526663_optimized",
|
|
"timestamp": 1763526663.7522638,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.11925172805786133,
|
|
"tokens_per_second": 385.7386450423649,
|
|
"time_per_token": 2.5924288708230723,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526666_non_optimized",
|
|
"timestamp": 1763526666.9662895,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.33091211318969727,
|
|
"tokens_per_second": 123.8999672897937,
|
|
"time_per_token": 8.071027150968225,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526666_optimized",
|
|
"timestamp": 1763526667.077263,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.10660624504089355,
|
|
"tokens_per_second": 384.5928536763736,
|
|
"time_per_token": 2.6001523180705743,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526669_non_optimized",
|
|
"timestamp": 1763526670.2709506,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 35,
|
|
"total_time": 0.32552552223205566,
|
|
"tokens_per_second": 107.51845127230834,
|
|
"time_per_token": 9.300729206630162,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526669_optimized",
|
|
"timestamp": 1763526670.3674176,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 35,
|
|
"total_time": 0.09210538864135742,
|
|
"tokens_per_second": 379.9994822917907,
|
|
"time_per_token": 2.631582532610212,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526673_non_optimized",
|
|
"timestamp": 1763526673.5689995,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.32962512969970703,
|
|
"tokens_per_second": 121.34997121257273,
|
|
"time_per_token": 8.240628242492676,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526673_optimized",
|
|
"timestamp": 1763526673.678643,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.10524392127990723,
|
|
"tokens_per_second": 380.0694568726284,
|
|
"time_per_token": 2.6310980319976807,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526676_non_optimized",
|
|
"timestamp": 1763526676.8867915,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 44,
|
|
"total_time": 0.3409693241119385,
|
|
"tokens_per_second": 129.04386667216733,
|
|
"time_per_token": 7.749302820725875,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526676_optimized",
|
|
"timestamp": 1763526677.0066595,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 44,
|
|
"total_time": 0.11539912223815918,
|
|
"tokens_per_second": 381.2853958212384,
|
|
"time_per_token": 2.622707323594527,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526679_non_optimized",
|
|
"timestamp": 1763526680.210214,
|
|
"optimized": false,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.34114503860473633,
|
|
"tokens_per_second": 120.18348608465084,
|
|
"time_per_token": 8.320610697676496,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526679_optimized",
|
|
"timestamp": 1763526680.3233387,
|
|
"optimized": true,
|
|
"prompt_length": 59,
|
|
"generated_length": 41,
|
|
"total_time": 0.10867524147033691,
|
|
"tokens_per_second": 377.27084334283273,
|
|
"time_per_token": 2.6506156456179735,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526683_non_optimized",
|
|
"timestamp": 1763526683.5293164,
|
|
"optimized": false,
|
|
"prompt_length": 49,
|
|
"generated_length": 51,
|
|
"total_time": 0.3626677989959717,
|
|
"tokens_per_second": 140.6245609375606,
|
|
"time_per_token": 7.111133313646503,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526683_optimized",
|
|
"timestamp": 1763526683.6654575,
|
|
"optimized": true,
|
|
"prompt_length": 49,
|
|
"generated_length": 51,
|
|
"total_time": 0.13167476654052734,
|
|
"tokens_per_second": 387.31794511519433,
|
|
"time_per_token": 2.581858167461321,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526686_non_optimized",
|
|
"timestamp": 1763526686.8576732,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 43,
|
|
"total_time": 0.3366522789001465,
|
|
"tokens_per_second": 127.7282308632585,
|
|
"time_per_token": 7.829122765119686,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526686_optimized",
|
|
"timestamp": 1763526686.973754,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 43,
|
|
"total_time": 0.11158990859985352,
|
|
"tokens_per_second": 385.33950371975163,
|
|
"time_per_token": 2.5951141534849658,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526689_non_optimized",
|
|
"timestamp": 1763526690.151102,
|
|
"optimized": false,
|
|
"prompt_length": 68,
|
|
"generated_length": 32,
|
|
"total_time": 0.3112297058105469,
|
|
"tokens_per_second": 102.81794893794354,
|
|
"time_per_token": 9.72592830657959,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526689_optimized",
|
|
"timestamp": 1763526690.239994,
|
|
"optimized": true,
|
|
"prompt_length": 68,
|
|
"generated_length": 32,
|
|
"total_time": 0.08438873291015625,
|
|
"tokens_per_second": 379.19754090950187,
|
|
"time_per_token": 2.637147903442383,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526693_non_optimized",
|
|
"timestamp": 1763526693.4361937,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 43,
|
|
"total_time": 0.33553242683410645,
|
|
"tokens_per_second": 128.15452862700513,
|
|
"time_per_token": 7.803079693816429,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526693_optimized",
|
|
"timestamp": 1763526693.5529184,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 43,
|
|
"total_time": 0.11221909523010254,
|
|
"tokens_per_second": 383.1789938408391,
|
|
"time_per_token": 2.6097464007000593,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526696_non_optimized",
|
|
"timestamp": 1763526696.7601304,
|
|
"optimized": false,
|
|
"prompt_length": 57,
|
|
"generated_length": 43,
|
|
"total_time": 0.34420084953308105,
|
|
"tokens_per_second": 124.92705947219716,
|
|
"time_per_token": 8.004670919373979,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526696_optimized",
|
|
"timestamp": 1763526696.8780057,
|
|
"optimized": true,
|
|
"prompt_length": 57,
|
|
"generated_length": 43,
|
|
"total_time": 0.11339879035949707,
|
|
"tokens_per_second": 379.192757380227,
|
|
"time_per_token": 2.6371811711510946,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526699_non_optimized",
|
|
"timestamp": 1763526700.0553536,
|
|
"optimized": false,
|
|
"prompt_length": 62,
|
|
"generated_length": 38,
|
|
"total_time": 0.32698941230773926,
|
|
"tokens_per_second": 116.21171380386193,
|
|
"time_per_token": 8.604984534414191,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526699_optimized",
|
|
"timestamp": 1763526700.160437,
|
|
"optimized": true,
|
|
"prompt_length": 62,
|
|
"generated_length": 38,
|
|
"total_time": 0.10051822662353516,
|
|
"tokens_per_second": 378.04089145264277,
|
|
"time_per_token": 2.6452164900930306,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526702_non_optimized",
|
|
"timestamp": 1763526703.3607254,
|
|
"optimized": false,
|
|
"prompt_length": 48,
|
|
"generated_length": 52,
|
|
"total_time": 0.3581371307373047,
|
|
"tokens_per_second": 145.19577987612305,
|
|
"time_per_token": 6.887252514178936,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526702_optimized",
|
|
"timestamp": 1763526703.501407,
|
|
"optimized": true,
|
|
"prompt_length": 48,
|
|
"generated_length": 52,
|
|
"total_time": 0.1360619068145752,
|
|
"tokens_per_second": 382.17897439042554,
|
|
"time_per_token": 2.6165751310495233,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526706_non_optimized",
|
|
"timestamp": 1763526706.6655996,
|
|
"optimized": false,
|
|
"prompt_length": 65,
|
|
"generated_length": 35,
|
|
"total_time": 0.32056474685668945,
|
|
"tokens_per_second": 109.18231135267965,
|
|
"time_per_token": 9.158992767333984,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526706_optimized",
|
|
"timestamp": 1763526706.7639794,
|
|
"optimized": true,
|
|
"prompt_length": 65,
|
|
"generated_length": 35,
|
|
"total_time": 0.09380245208740234,
|
|
"tokens_per_second": 373.1245742636668,
|
|
"time_per_token": 2.680070059640067,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526709_non_optimized",
|
|
"timestamp": 1763526709.9760077,
|
|
"optimized": false,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.3535304069519043,
|
|
"tokens_per_second": 130.11610626821704,
|
|
"time_per_token": 7.685443629389224,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526709_optimized",
|
|
"timestamp": 1763526710.1015396,
|
|
"optimized": true,
|
|
"prompt_length": 54,
|
|
"generated_length": 46,
|
|
"total_time": 0.12090945243835449,
|
|
"tokens_per_second": 380.44999023920843,
|
|
"time_per_token": 2.6284663573555327,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526712_non_optimized",
|
|
"timestamp": 1763526713.2750318,
|
|
"optimized": false,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.3372828960418701,
|
|
"tokens_per_second": 118.59480711715196,
|
|
"time_per_token": 8.432072401046753,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526712_optimized",
|
|
"timestamp": 1763526713.3839555,
|
|
"optimized": true,
|
|
"prompt_length": 60,
|
|
"generated_length": 40,
|
|
"total_time": 0.1043252944946289,
|
|
"tokens_per_second": 383.41612351795817,
|
|
"time_per_token": 2.6081323623657227,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526716_non_optimized",
|
|
"timestamp": 1763526716.5846882,
|
|
"optimized": false,
|
|
"prompt_length": 56,
|
|
"generated_length": 44,
|
|
"total_time": 0.3465569019317627,
|
|
"tokens_per_second": 126.96327718402686,
|
|
"time_per_token": 7.87629322572188,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526716_optimized",
|
|
"timestamp": 1763526716.704748,
|
|
"optimized": true,
|
|
"prompt_length": 56,
|
|
"generated_length": 44,
|
|
"total_time": 0.11548757553100586,
|
|
"tokens_per_second": 380.9933648506369,
|
|
"time_per_token": 2.6247176257046787,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526719_non_optimized",
|
|
"timestamp": 1763526719.9162998,
|
|
"optimized": false,
|
|
"prompt_length": 55,
|
|
"generated_length": 45,
|
|
"total_time": 0.34871912002563477,
|
|
"tokens_per_second": 129.04368420260982,
|
|
"time_per_token": 7.74931377834744,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526719_optimized",
|
|
"timestamp": 1763526720.0379987,
|
|
"optimized": true,
|
|
"prompt_length": 55,
|
|
"generated_length": 45,
|
|
"total_time": 0.11701393127441406,
|
|
"tokens_per_second": 384.5695936364081,
|
|
"time_per_token": 2.6003095838758683,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526722_non_optimized",
|
|
"timestamp": 1763526723.1653128,
|
|
"optimized": false,
|
|
"prompt_length": 86,
|
|
"generated_length": 14,
|
|
"total_time": 0.2764289379119873,
|
|
"tokens_per_second": 50.64592768669351,
|
|
"time_per_token": 19.74492413657052,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526722_optimized",
|
|
"timestamp": 1763526723.2101393,
|
|
"optimized": true,
|
|
"prompt_length": 86,
|
|
"generated_length": 14,
|
|
"total_time": 0.04014396667480469,
|
|
"tokens_per_second": 348.74480923647076,
|
|
"time_per_token": 2.867426191057478,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526726_non_optimized",
|
|
"timestamp": 1763526726.321796,
|
|
"optimized": false,
|
|
"prompt_length": 87,
|
|
"generated_length": 13,
|
|
"total_time": 0.2667272090911865,
|
|
"tokens_per_second": 48.73893460024045,
|
|
"time_per_token": 20.517477622398964,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526726_optimized",
|
|
"timestamp": 1763526726.3628628,
|
|
"optimized": true,
|
|
"prompt_length": 87,
|
|
"generated_length": 13,
|
|
"total_time": 0.03642010688781738,
|
|
"tokens_per_second": 356.9456849767932,
|
|
"time_per_token": 2.80154668367826,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526729_non_optimized",
|
|
"timestamp": 1763526729.4611804,
|
|
"optimized": false,
|
|
"prompt_length": 92,
|
|
"generated_length": 8,
|
|
"total_time": 0.2541630268096924,
|
|
"tokens_per_second": 31.47586059395687,
|
|
"time_per_token": 31.770378351211548,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526729_optimized",
|
|
"timestamp": 1763526729.489691,
|
|
"optimized": true,
|
|
"prompt_length": 92,
|
|
"generated_length": 8,
|
|
"total_time": 0.02371382713317871,
|
|
"tokens_per_second": 337.3559212973669,
|
|
"time_per_token": 2.964228391647339,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526732_non_optimized",
|
|
"timestamp": 1763526732.6022036,
|
|
"optimized": false,
|
|
"prompt_length": 91,
|
|
"generated_length": 9,
|
|
"total_time": 0.25944042205810547,
|
|
"tokens_per_second": 34.69004532371721,
|
|
"time_per_token": 28.82671356201172,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526732_optimized",
|
|
"timestamp": 1763526732.6337376,
|
|
"optimized": true,
|
|
"prompt_length": 91,
|
|
"generated_length": 9,
|
|
"total_time": 0.02675771713256836,
|
|
"tokens_per_second": 336.3515637530072,
|
|
"time_per_token": 2.9730796813964844,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526735_non_optimized",
|
|
"timestamp": 1763526735.746593,
|
|
"optimized": false,
|
|
"prompt_length": 96,
|
|
"generated_length": 4,
|
|
"total_time": 0.24565601348876953,
|
|
"tokens_per_second": 16.28293133635365,
|
|
"time_per_token": 61.41400337219238,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526735_optimized",
|
|
"timestamp": 1763526735.7645717,
|
|
"optimized": true,
|
|
"prompt_length": 96,
|
|
"generated_length": 4,
|
|
"total_time": 0.013253450393676758,
|
|
"tokens_per_second": 301.80819946392273,
|
|
"time_per_token": 3.3133625984191895,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526738_non_optimized",
|
|
"timestamp": 1763526738.6402805,
|
|
"optimized": false,
|
|
"prompt_length": 100,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002372264862060547,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526738_optimized",
|
|
"timestamp": 1763526738.7505527,
|
|
"optimized": true,
|
|
"prompt_length": 100,
|
|
"generated_length": 0,
|
|
"total_time": 0.10556960105895996,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.09814453125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526741_non_optimized",
|
|
"timestamp": 1763526741.855399,
|
|
"optimized": false,
|
|
"prompt_length": 98,
|
|
"generated_length": 2,
|
|
"total_time": 0.23850727081298828,
|
|
"tokens_per_second": 8.385488598241453,
|
|
"time_per_token": 119.25363540649414,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526741_optimized",
|
|
"timestamp": 1763526741.8686676,
|
|
"optimized": true,
|
|
"prompt_length": 98,
|
|
"generated_length": 2,
|
|
"total_time": 0.008454322814941406,
|
|
"tokens_per_second": 236.56536943034405,
|
|
"time_per_token": 4.227161407470703,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526744_non_optimized",
|
|
"timestamp": 1763526744.9965777,
|
|
"optimized": false,
|
|
"prompt_length": 95,
|
|
"generated_length": 5,
|
|
"total_time": 0.2477104663848877,
|
|
"tokens_per_second": 20.184855621849653,
|
|
"time_per_token": 49.54209327697754,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526744_optimized",
|
|
"timestamp": 1763526745.0179746,
|
|
"optimized": true,
|
|
"prompt_length": 95,
|
|
"generated_length": 5,
|
|
"total_time": 0.016021251678466797,
|
|
"tokens_per_second": 312.0854787344862,
|
|
"time_per_token": 3.2042503356933594,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526747_non_optimized",
|
|
"timestamp": 1763526748.1259825,
|
|
"optimized": false,
|
|
"prompt_length": 99,
|
|
"generated_length": 1,
|
|
"total_time": 0.2374560832977295,
|
|
"tokens_per_second": 4.211305038440183,
|
|
"time_per_token": 237.4560832977295,
|
|
"memory_used_mb": 83.72900390625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526747_optimized",
|
|
"timestamp": 1763526748.1369371,
|
|
"optimized": true,
|
|
"prompt_length": 99,
|
|
"generated_length": 1,
|
|
"total_time": 0.006153583526611328,
|
|
"tokens_per_second": 162.50693529639673,
|
|
"time_per_token": 6.153583526611328,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526750_non_optimized",
|
|
"timestamp": 1763526750.990292,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002415180206298828,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526750_optimized",
|
|
"timestamp": 1763526751.1003447,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.10532331466674805,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.431640625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526753_non_optimized",
|
|
"timestamp": 1763526753.9551547,
|
|
"optimized": false,
|
|
"prompt_length": 111,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002372264862060547,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526753_optimized",
|
|
"timestamp": 1763526754.065468,
|
|
"optimized": true,
|
|
"prompt_length": 111,
|
|
"generated_length": 0,
|
|
"total_time": 0.10554146766662598,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.62255859375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526756_non_optimized",
|
|
"timestamp": 1763526756.9100564,
|
|
"optimized": false,
|
|
"prompt_length": 104,
|
|
"generated_length": 0,
|
|
"total_time": 0.000232696533203125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526756_optimized",
|
|
"timestamp": 1763526757.0201974,
|
|
"optimized": true,
|
|
"prompt_length": 104,
|
|
"generated_length": 0,
|
|
"total_time": 0.10536527633666992,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.28857421875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526759_non_optimized",
|
|
"timestamp": 1763526760.134444,
|
|
"optimized": false,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.24916291236877441,
|
|
"tokens_per_second": 12.04031519570553,
|
|
"time_per_token": 83.0543041229248,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526759_optimized",
|
|
"timestamp": 1763526760.1502607,
|
|
"optimized": true,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.01096487045288086,
|
|
"tokens_per_second": 273.6010437051533,
|
|
"time_per_token": 3.654956817626953,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526763_non_optimized",
|
|
"timestamp": 1763526763.0106947,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002446174621582031,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526763_optimized",
|
|
"timestamp": 1763526763.1213396,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.1058340072631836,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526765_non_optimized",
|
|
"timestamp": 1763526765.988826,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002529621124267578,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526765_optimized",
|
|
"timestamp": 1763526766.099505,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.10576152801513672,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526768_non_optimized",
|
|
"timestamp": 1763526768.9637494,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024008750915527344,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526768_optimized",
|
|
"timestamp": 1763526769.0741549,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.10558032989501953,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.33642578125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526771_non_optimized",
|
|
"timestamp": 1763526772.1941426,
|
|
"optimized": false,
|
|
"prompt_length": 94,
|
|
"generated_length": 6,
|
|
"total_time": 0.25710487365722656,
|
|
"tokens_per_second": 23.33678049214745,
|
|
"time_per_token": 42.85081227620443,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526771_optimized",
|
|
"timestamp": 1763526772.217405,
|
|
"optimized": true,
|
|
"prompt_length": 94,
|
|
"generated_length": 6,
|
|
"total_time": 0.01833653450012207,
|
|
"tokens_per_second": 327.215592453419,
|
|
"time_per_token": 3.0560890833536782,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526775_non_optimized",
|
|
"timestamp": 1763526775.0632038,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023651123046875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526775_optimized",
|
|
"timestamp": 1763526775.1732247,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.10519266128540039,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526778_non_optimized",
|
|
"timestamp": 1763526778.0269144,
|
|
"optimized": false,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025081634521484375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526778_optimized",
|
|
"timestamp": 1763526778.1373425,
|
|
"optimized": true,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.1055910587310791,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.24072265625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526780_non_optimized",
|
|
"timestamp": 1763526780.9818347,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024700164794921875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526780_optimized",
|
|
"timestamp": 1763526781.0923955,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.10562825202941895,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.431640625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526783_non_optimized",
|
|
"timestamp": 1763526783.948955,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024271011352539062,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526783_optimized",
|
|
"timestamp": 1763526784.0592883,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.10540580749511719,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.57470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526786_non_optimized",
|
|
"timestamp": 1763526786.926905,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002503395080566406,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526786_optimized",
|
|
"timestamp": 1763526787.0370946,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.10536813735961914,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.3837890625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526789_non_optimized",
|
|
"timestamp": 1763526789.8929183,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025582313537597656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526789_optimized",
|
|
"timestamp": 1763526790.0033913,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.10552310943603516,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.52685546875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526792_non_optimized",
|
|
"timestamp": 1763526792.8616316,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002377033233642578,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526792_optimized",
|
|
"timestamp": 1763526792.972686,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.10608291625976562,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.57470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526795_non_optimized",
|
|
"timestamp": 1763526795.8468068,
|
|
"optimized": false,
|
|
"prompt_length": 114,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024056434631347656,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526795_optimized",
|
|
"timestamp": 1763526795.9571438,
|
|
"optimized": true,
|
|
"prompt_length": 114,
|
|
"generated_length": 0,
|
|
"total_time": 0.1053767204284668,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.765625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526798_non_optimized",
|
|
"timestamp": 1763526798.8149438,
|
|
"optimized": false,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024628639221191406,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526798_optimized",
|
|
"timestamp": 1763526798.9252326,
|
|
"optimized": true,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.10529732704162598,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.7177734375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526801_non_optimized",
|
|
"timestamp": 1763526801.7890985,
|
|
"optimized": false,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002460479736328125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526801_optimized",
|
|
"timestamp": 1763526801.899515,
|
|
"optimized": true,
|
|
"prompt_length": 113,
|
|
"generated_length": 0,
|
|
"total_time": 0.1053619384765625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.7177734375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526804_non_optimized",
|
|
"timestamp": 1763526804.7603977,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023674964904785156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526804_optimized",
|
|
"timestamp": 1763526804.8709013,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.10542559623718262,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.669921875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526807_non_optimized",
|
|
"timestamp": 1763526807.7322128,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002338886260986328,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526807_optimized",
|
|
"timestamp": 1763526807.842512,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.10524344444274902,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.52685546875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526810_non_optimized",
|
|
"timestamp": 1763526810.7003376,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023674964904785156,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526810_optimized",
|
|
"timestamp": 1763526810.8109784,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.10563492774963379,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526813_non_optimized",
|
|
"timestamp": 1763526813.6818757,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002524852752685547,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526813_optimized",
|
|
"timestamp": 1763526813.7921646,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.10520672798156738,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.33642578125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526816_non_optimized",
|
|
"timestamp": 1763526816.6591551,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023221969604492188,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526816_optimized",
|
|
"timestamp": 1763526816.7695994,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.10536456108093262,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.52685546875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526819_non_optimized",
|
|
"timestamp": 1763526819.6383152,
|
|
"optimized": false,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023484230041503906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526819_optimized",
|
|
"timestamp": 1763526819.7488954,
|
|
"optimized": true,
|
|
"prompt_length": 103,
|
|
"generated_length": 0,
|
|
"total_time": 0.10547161102294922,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.24072265625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526822_non_optimized",
|
|
"timestamp": 1763526822.6187825,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002391338348388672,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526822_optimized",
|
|
"timestamp": 1763526822.7292917,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.10544276237487793,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526825_non_optimized",
|
|
"timestamp": 1763526825.583301,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023412704467773438,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526825_optimized",
|
|
"timestamp": 1763526825.6933794,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.10494589805603027,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.431640625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526828_non_optimized",
|
|
"timestamp": 1763526828.5638628,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024127960205078125,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526828_optimized",
|
|
"timestamp": 1763526828.6741154,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.1051325798034668,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.669921875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526831_non_optimized",
|
|
"timestamp": 1763526831.545799,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023818016052246094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526831_optimized",
|
|
"timestamp": 1763526831.6566033,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.10563468933105469,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.33642578125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526834_non_optimized",
|
|
"timestamp": 1763526834.7725186,
|
|
"optimized": false,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.2505476474761963,
|
|
"tokens_per_second": 11.973770379484487,
|
|
"time_per_token": 83.51588249206543,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526834_optimized",
|
|
"timestamp": 1763526834.7885733,
|
|
"optimized": true,
|
|
"prompt_length": 97,
|
|
"generated_length": 3,
|
|
"total_time": 0.01080775260925293,
|
|
"tokens_per_second": 277.5785224239483,
|
|
"time_per_token": 3.60258420308431,
|
|
"memory_used_mb": 86.14013671875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526837_non_optimized",
|
|
"timestamp": 1763526837.7080257,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00025272369384765625,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526837_optimized",
|
|
"timestamp": 1763526837.8196452,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.10632824897766113,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526840_non_optimized",
|
|
"timestamp": 1763526840.7012856,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023818016052246094,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526840_optimized",
|
|
"timestamp": 1763526840.8124819,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.10594463348388672,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526843_non_optimized",
|
|
"timestamp": 1763526843.7108738,
|
|
"optimized": false,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002484321594238281,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526843_optimized",
|
|
"timestamp": 1763526843.8216114,
|
|
"optimized": true,
|
|
"prompt_length": 109,
|
|
"generated_length": 0,
|
|
"total_time": 0.10553407669067383,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.52685546875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526846_non_optimized",
|
|
"timestamp": 1763526846.7072785,
|
|
"optimized": false,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023412704467773438,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526846_optimized",
|
|
"timestamp": 1763526846.8180177,
|
|
"optimized": true,
|
|
"prompt_length": 112,
|
|
"generated_length": 0,
|
|
"total_time": 0.10541939735412598,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.669921875,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526849_non_optimized",
|
|
"timestamp": 1763526849.695143,
|
|
"optimized": false,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002357959747314453,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526849_optimized",
|
|
"timestamp": 1763526849.806518,
|
|
"optimized": true,
|
|
"prompt_length": 107,
|
|
"generated_length": 0,
|
|
"total_time": 0.10609865188598633,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.431640625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526852_non_optimized",
|
|
"timestamp": 1763526852.698653,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002410411834716797,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526852_optimized",
|
|
"timestamp": 1763526852.8100939,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.10619211196899414,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.3837890625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526855_non_optimized",
|
|
"timestamp": 1763526855.6878016,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023484230041503906,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526855_optimized",
|
|
"timestamp": 1763526855.798402,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.10535454750061035,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.33642578125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526858_non_optimized",
|
|
"timestamp": 1763526858.673868,
|
|
"optimized": false,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.00024962425231933594,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526858_optimized",
|
|
"timestamp": 1763526858.7845938,
|
|
"optimized": true,
|
|
"prompt_length": 102,
|
|
"generated_length": 0,
|
|
"total_time": 0.1053924560546875,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.193359375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526861_non_optimized",
|
|
"timestamp": 1763526861.6461842,
|
|
"optimized": false,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002357959747314453,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526861_optimized",
|
|
"timestamp": 1763526861.7578971,
|
|
"optimized": true,
|
|
"prompt_length": 106,
|
|
"generated_length": 0,
|
|
"total_time": 0.10634636878967285,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.3837890625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526864_non_optimized",
|
|
"timestamp": 1763526864.629814,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002498626708984375,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526864_optimized",
|
|
"timestamp": 1763526864.7408218,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.1056220531463623,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.33642578125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526867_non_optimized",
|
|
"timestamp": 1763526867.6077204,
|
|
"optimized": false,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.0002434253692626953,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526867_optimized",
|
|
"timestamp": 1763526867.7184992,
|
|
"optimized": true,
|
|
"prompt_length": 105,
|
|
"generated_length": 0,
|
|
"total_time": 0.10547971725463867,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.33642578125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526870_non_optimized",
|
|
"timestamp": 1763526870.5837529,
|
|
"optimized": false,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.00023603439331054688,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 73.24609375,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763526870_optimized",
|
|
"timestamp": 1763526870.6944253,
|
|
"optimized": true,
|
|
"prompt_length": 110,
|
|
"generated_length": 0,
|
|
"total_time": 0.10529303550720215,
|
|
"tokens_per_second": 0,
|
|
"time_per_token": 0,
|
|
"memory_used_mb": 86.57470703125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763936322_non_optimized",
|
|
"timestamp": 1763936322.5285127,
|
|
"optimized": false,
|
|
"prompt_length": 53,
|
|
"generated_length": 47,
|
|
"total_time": 0.34784841537475586,
|
|
"tokens_per_second": 135.11632631519785,
|
|
"time_per_token": 7.401030114356508,
|
|
"memory_used_mb": 83.77001953125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763936322_optimized",
|
|
"timestamp": 1763936322.6566308,
|
|
"optimized": true,
|
|
"prompt_length": 53,
|
|
"generated_length": 47,
|
|
"total_time": 0.12265253067016602,
|
|
"tokens_per_second": 383.1963331143258,
|
|
"time_per_token": 2.609628312131192,
|
|
"memory_used_mb": 86.13916015625,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
},
|
|
{
|
|
"run_name": "run_1763936325_non_optimized",
|
|
"timestamp": 1763936325.8276925,
|
|
"optimized": false,
|
|
"prompt_length": 66,
|
|
"generated_length": 34,
|
|
"total_time": 0.3239288330078125,
|
|
"tokens_per_second": 104.96132648735221,
|
|
"time_per_token": 9.527318617876839,
|
|
"memory_used_mb": 83.7705078125,
|
|
"gpu_utilization": null,
|
|
"device": "cuda"
|
|
}
|
|
]
|
|
} |