<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
  <url>
    <loc>https://blog.squeezebits.com</loc>
    <lastmod>2026-03-27</lastmod>
    <priority>1.00</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/bizinsight</loc>
    <lastmod>2025-07-06</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/fits-on-chips</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/intel-gaudi</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/owlite</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/product</loc>
    <lastmod>2024-10-02</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/research</loc>
    <lastmod>2025-02-17</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/tech</loc>
    <lastmod>2024-10-02</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/vllm-vs-trt-llm</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/category/yetter</loc>
    <lastmod>2025-10-31</lastmod>
    <priority>0.90</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/reliable-synthetic-data-physical-ai-production</loc>
    <lastmod>2026-03-31</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/reliable-synthetic-data-physical-ai</loc>
    <lastmod>2026-03-27</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/introducing-atom-max-npu</loc>
    <lastmod>2026-01-02</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/77516</loc>
    <lastmod>2025-12-23</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/intel-gaudi-gemm-attention-performance</loc>
    <lastmod>2026-02-24</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/yetter-genai-api-service</loc>
    <lastmod>2025-10-27</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/guided-decoding-performance-vllm-sglang</loc>
    <lastmod>2026-01-20</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/disaggregated-inference-on-apple-silicon-npu-prefill-and-gpu-decode-67176</loc>
    <lastmod>2025-10-02</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vocabulary-trimming-methods</loc>
    <lastmod>2026-02-25</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/gralora-boosting-fine-tuning-accuracy</loc>
    <lastmod>2025-09-28</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/owlite-qualcomm-on-device-ai</loc>
    <lastmod>2025-09-16</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/bringing-npus-into-production</loc>
    <lastmod>2025-09-10</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/how-to-quantize-transformerbased-model-for-tensorrt-deployment-55802</loc>
    <lastmod>2025-07-08</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/how-to-quantize-yolo-models-with-owlite-54076</loc>
    <lastmod>2025-05-20</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/owlite-no-more-compromising-on-ai-performance-after-quantization-51779</loc>
    <lastmod>2025-05-07</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/intel-gaudi-5-flux1-on-gaudi2-50213</loc>
    <lastmod>2026-03-03</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/tensorrtllm-goes-open-source-48780</loc>
    <lastmod>2025-03-26</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/when-should-i-use-fits-on-chips-46717</loc>
    <lastmod>2025-05-07</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/fits-on-chips-saving-llm-costs-became-easier-than-ever-38187</loc>
    <lastmod>2025-03-19</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/sleb-streamlining-llms-through-redundancy-verification-and-elimination-of-transformer-blocks-f2bb262342d6</loc>
    <lastmod>2025-07-21</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/the-missing-piece-of-tensorrtllm-42462</loc>
    <lastmod>2025-06-23</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/the-rise-and-fall-of-onnx-feat-pytorch-20-42184</loc>
    <lastmod>2025-08-04</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-13-visionlanguage-models-40761</loc>
    <lastmod>2025-05-06</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/intel-gaudi-4-fp8-quantization--40269</loc>
    <lastmod>2025-04-16</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/intel-gaudi-3-performance-evaluation-with-synapseai-v119-39839</loc>
    <lastmod>2025-05-02</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-12-automatic-prefix-caching-38189</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-11-speculative-decoding-37301</loc>
    <lastmod>2025-05-06</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/37065</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/36821</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-9-parallelism-strategies-36310</loc>
    <lastmod>2025-02-05</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/intel-gaudi-1-introduction-35414</loc>
    <lastmod>2025-07-11</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-8-kv-cache-quantization-35079</loc>
    <lastmod>2025-07-22</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-7-weightactivation-quantization-34461</loc>
    <lastmod>2025-07-22</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-6-weightonly-quantization-33728</loc>
    <lastmod>2025-07-22</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-5-dynamic-sequence-lengths--33410</loc>
    <lastmod>2025-03-09</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-4-which-scheduler-wins--33083</loc>
    <lastmod>2025-03-25</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-3-understanding-sampling-methods-and-their-performance-impact-31921</loc>
    <lastmod>2025-05-09</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-2-towards-optimal-batching-for-llm-serving-31349</loc>
    <lastmod>2025-03-09</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/vllm-vs-tensorrtllm-1-an-overall-evaluation-30703</loc>
    <lastmod>2025-10-18</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/how-much-can-we-save-through-compression-b675c60611b4</loc>
    <lastmod>2025-01-24</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/breaking-down-tokenizers-in-llms-5699a8122574</loc>
    <lastmod>2025-01-24</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/accuracy-degradation-in-ai-compression-myth-or-truth-c7a94ec0bc92</loc>
    <lastmod>2025-01-24</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/are-you-getting-everything-out-of-your-gpus-1f030a4a460f</loc>
    <lastmod>2025-09-28</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/things-to-check-if-your-business-utilizes-ai-53be650a1248</loc>
    <lastmod>2025-01-24</lastmod>
    <priority>0.80</priority>
  </url>
  <url>
    <loc>https://blog.squeezebits.com/4-types-of-ai-compression-methods-you-should-know-5d07759c60a7</loc>
    <lastmod>2025-01-24</lastmod>
    <priority>0.80</priority>
  </url>
</urlset>