<?xml version="1.0" encoding="UTF-8"?>
    <rss version="2.0">
      <channel>
        <title>The official SqueezeBits Tech blog</title>
        <link>https://blog.squeezebits.com</link>
        <description>The official SqueezeBits Tech blog covering AI model compression and optimization, LLM and Edge AI systems, and deep technical insights into products like Yetter, Fits on Chips, and OwLite, along with research, engineering, and development updates.</description>
        <pubDate>Mon, 10 Apr 2023 03:50:02 GMT</pubDate>
        <generator>inblog</generator>
        <lastBuildDate>Tue, 14 Apr 2026 05:08:30 GMT</lastBuildDate>
        <image>
          <url>https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Fog_image%2F2026-01-07T04%3A48%3A26.787Z-d4f56888-0576-4c57-bdc3-f6f129134bb1&amp;w=1920&amp;q=85</url>
          <title>og image</title>
          <link>https://blog.squeezebits.com</link>
        </image>

        <item>
              <title>Recap: 2nd vLLM Korea Meetup 2026</title>
              <link>https://blog.squeezebits.com/vllm-korea-meetup-2026-en</link>
              <description>Check out highlights from the 2nd vLLM Korea Meetup — open-source use cases and real-world production examples that showcase vLLM&apos;s technical maturity!</description>
              <pubDate>Tue, 14 Apr 2026 08:54:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-korea-meetup-2026-en</guid>
              <category>Event</category>
              <author>Goeun Kang</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T06%3A35%3A04.590Z-92b90155-1c97-40a9-bc51-6de1335133f6&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Our Experience Running a Booth at GTC 2026</title>
              <link>https://blog.squeezebits.com/gtc-conference-booth-review-en</link>
              <description>Sharing GTC 2026 insights, which is the Largest AI Industry Conference for developers! If you’ve ever wondered what it’s like for an AI startup to run a booth at such a massive event, you won’t want to miss this!</description>
              <pubDate>Fri, 27 Mar 2026 09:11:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/gtc-conference-booth-review-en</guid>
              <category>Event</category>
              <author>Goeun Kang</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T02%3A26%3A46.857Z-04b185f9-c2e4-49d5-ba46-ef3dbd8e65d5&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Reliable &amp; Scalable Synthetic Data for Physical AI (Part 2): Making Cosmos 3.1 x Faster for Production</title>
              <link>https://blog.squeezebits.com/reliable-synthetic-data-physical-ai-production</link>
              <description>Explore why Physical AI deployment needs synthetic data at scale with Squeezebits&apos; research and discover how to overcome inference bottlenecks to accelerate Roboost Agent.</description>
              <pubDate>Wed, 11 Mar 2026 06:49:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/reliable-synthetic-data-physical-ai-production</guid>
              <category>Research</category>
              <author>Jongho Lee</author><author>Daehyun Ahn</author><author>Yeonjoon Jung</author><author>Semin Kim</author><author>Seungryeol Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-03-11T06%3A53%3A10.542Z-04b9d117-b760-4a0a-97dc-8ce5548abb53&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Reliable &amp; Scalable Synthetic Data for Physical AI (Part 1): Taming NVIDIA Cosmos with RoBoost Agent</title>
              <link>https://blog.squeezebits.com/reliable-synthetic-data-physical-ai</link>
              <description>Scaling Physical AI requires reliable synthetic data. Learn how RoBoost Agent integrates NVIDIA Cosmos to transform world models into trustworthy data engines for robotics and autonomous driving.</description>
              <pubDate>Wed, 25 Feb 2026 08:22:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/reliable-synthetic-data-physical-ai</guid>
              <category>Research</category>
              <author>Daehyun Ahn</author><author>Jongho Lee</author><author>Yeonjoon Jung</author><author>Semin Kim</author><author>Seungryeol Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-02-25T08%3A14%3A59.756Z-aeb0efd2-32e6-4730-8049-95a6d47c8089&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Intel® Gaudi® Hands-on Workshop | A Recap of the Gaudi Workshop with SqueezeBits x Lablup</title>
              <link>https://blog.squeezebits.com/intel-gaudi-hands-on-workshop-en</link>
              <description>A recap of the Intel® Gaudi® hands-on workshop co-hosted by SqueezeBits and Lablup. AI model compression, fine-tuning, and vLLM serving on Gaudi® hardware with Backend.AI.</description>
              <pubDate>Wed, 07 Jan 2026 07:34:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/intel-gaudi-hands-on-workshop-en</guid>
              <category>Event</category>
              <author>Goeun Kang</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T06%3A28%3A06.078Z-a8cf30d5-3291-43c0-a24e-974d48256eab&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Introducing rebellions ATOM™-MAX</title>
              <link>https://blog.squeezebits.com/introducing-atom-max-npu</link>
              <description>Introducing ATOM™-Max, rebellions’ next-generation NPU designed for high-performance AI inference. Learn how its runtime, profiling tools, and PyTorch-native integrations enable developers to run and serve models efficiently without sacrificing usability.</description>
              <pubDate>Wed, 24 Dec 2025 08:30:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/introducing-atom-max-npu</guid>
              <category>Tech Insight</category>
              <author>Huijong Jeong</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-12-24T08%3A39%3A11.341Z-cc950dc1-fc4c-41aa-bbae-4fc95e72ce6a&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>vLLM Hands-on Workshop with Rebellions &amp; SqueezeBits: A Recap</title>
              <link>https://blog.squeezebits.com/vllm-hands-on-workshop-with-rebellions-squeezebits-en</link>
              <description>Rebellions and SqueezeBits Co-Host a vLLM Hands-on Workshop: Workshop Highlights, PyTorch Best Practices, Performance Optimization, and Developer First-Hand Tips</description>
              <pubDate>Wed, 10 Dec 2025 03:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-hands-on-workshop-with-rebellions-squeezebits-en</guid>
              <category>Event</category>
              <author>Goeun Kang</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T05%3A55%3A02.696Z-183f050a-8ef5-4d0b-bb97-410f90d3d115&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Winning both speed and quality: How Yetter deals with diffusion models</title>
              <link>https://blog.squeezebits.com/77516</link>
              <description>Explore how the Yetter Inference Engine overcomes the limitations of step caching and model distillation for diffusion models. We analyze latency, diversity, quality, and negative-prompt handling to reveal what truly matters for scalable, real-time image generation.</description>
              <pubDate>Fri, 31 Oct 2025 08:54:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/77516</guid>
              <category>Product</category>
              <author>Yeonjoon Jung</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-10-31T08%3A48%3A57.939Z-937496ee-e715-4dfa-a00a-9a58fb6375c9&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Intel Gaudi] #6. GEMM, Attention, vLLM on Gaudi</title>
              <link>https://blog.squeezebits.com/intel-gaudi-gemm-attention-performance</link>
              <description>Explore how Intel’s new Gaudi-3 compares to Gaudi-2, NVIDIA A100, and H100. We analyze real-world GEMM efficiency, attention performance, and LLM serving results to uncover what truly matters for AI inference and training workloads.</description>
              <pubDate>Tue, 28 Oct 2025 00:30:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/intel-gaudi-gemm-attention-performance</guid>
              <category>Tech Insight</category>
              <author>Taesu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-10-28T00%3A40%3A59.101Z-7c388dfa-db55-4858-bde2-09e87d8815c7&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Yetter, the GenAI API service: AI Optimization, Out of the Box</title>
              <link>https://blog.squeezebits.com/yetter-genai-api-service</link>
              <description>Meet &apos;Yetter&apos;: the generative AI API service built for speed, efficiency, and scalability. Powered by our optimization inference engine, it delivers reliable image, video, and future LLM services at a fraction of the cost.</description>
              <pubDate>Thu, 02 Oct 2025 04:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/yetter-genai-api-service</guid>
              <category>Product</category>
              <author>Seungryeol Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-10-02T04%3A01%3A25.656Z-6bcf52b5-812b-40b4-9287-1e24df7e73e5&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Guided Decoding Performance on vLLM and SGLang</title>
              <link>https://blog.squeezebits.com/guided-decoding-performance-vllm-sglang</link>
              <description>The guide to LLM guided decoding! This deep-dive benchmark compares XGrammar and LLGuidance on vLLM and SGLang to help you find the optimal setup for generating structured output based on your use case.</description>
              <pubDate>Tue, 16 Sep 2025 05:59:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/guided-decoding-performance-vllm-sglang</guid>
              <category>Tech Insight</category>
              <author>Eunik Park</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-09-16T06%3A10%3A31.126Z-3e0936b7-e8fc-40af-b1a3-484370bc0747&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Disaggregated Inference on Apple Silicon: NPU prefill and GPU decode</title>
              <link>https://blog.squeezebits.com/disaggregated-inference-on-apple-silicon-npu-prefill-and-gpu-decode-67176</link>
              <description>In this article, we introduce how to run LLMs efficiently on Apple Silicon with disaggregated inference technique.</description>
              <pubDate>Tue, 26 Aug 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/disaggregated-inference-on-apple-silicon-npu-prefill-and-gpu-decode-67176</guid>
              <category>Tech Insight</category>
              <author>Jiwoong Choi</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-08-26T15%3A46%3A52.445Z-eaf2879b-69d2-446c-b434-77251dc987f6&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Efficient AI Study] AI Model Compression Community Study and Meetup</title>
              <link>https://blog.squeezebits.com/efficient-ai-study-meetup-by-squeezebits-en</link>
              <description>Efficient AI Study &amp; Meetup recap: SqueezeBits&apos; community study on AI model compression, featuring paper reviews, participant interviews, and networking from the offline meetup.</description>
              <pubDate>Wed, 20 Aug 2025 08:27:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/efficient-ai-study-meetup-by-squeezebits-en</guid>
              <category>Event</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T06%3A27%3A46.209Z-306bfc41-c7e5-4279-847f-779a2b84007e&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Vocabulary Trimming: An Easy and Effective Method for SLM Acceleration</title>
              <link>https://blog.squeezebits.com/vocabulary-trimming-methods</link>
              <description>Trimming large multilingual vocabularies in Small Language Models (SLM) is a simple, low-risk way to boost efficiency to its limit. It accelerates the model inference significantly while keeping accuracy almost unchanged.</description>
              <pubDate>Mon, 04 Aug 2025 09:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vocabulary-trimming-methods</guid>
              <category>Research</category>
              <author>Semin Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-08-05T01%3A00%3A28.786Z-1d00e7cc-75fd-4c6d-a343-f157c9167947&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>GraLoRA: Boosting Fine-Tuning Accuracy Without Extra Cost</title>
              <link>https://blog.squeezebits.com/gralora-boosting-fine-tuning-accuracy</link>
              <description>LoRA excels at efficient fine-tuning but suffers at higher ranks due to gradient entanglement. We introduce GraLoRA, which addresses these issues through finer-grained, block-wise updates, significantly enhancing performance and expressivity without overhead. GraLoRA outperforms LoRA across tasks, achieving up to +8.5% improvement in HumanEval+ Pass@1.</description>
              <pubDate>Mon, 21 Jul 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/gralora-boosting-fine-tuning-accuracy</guid>
              <category>Research</category>
              <author>Yeonjoon Jung</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-07-21T07%3A14%3A44.333Z-db180523-412d-4c51-a47a-467f1c9ba0b1&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>OwLite Meets Qualcomm Neural Network: Unlocking On-Device AI Performance</title>
              <link>https://blog.squeezebits.com/owlite-qualcomm-on-device-ai</link>
              <description>At SqueezeBits we have been empowering developers to efficiently deploy complex AI models while minimizing performance trade-offs with OwLite toolkit. With OwLite v2.5, we&apos;re excited to announce official support for Qualcomm Neural Network (QNN) through seamless integration with Qualcomm AI Hub. </description>
              <pubDate>Thu, 03 Jul 2025 01:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/owlite-qualcomm-on-device-ai</guid>
              <category>Product</category>
              <author>Eunik Park</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-06-30T04%3A06%3A16.245Z-327b6baa-af4b-4941-9321-5f120d2d312a&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Bringing NPUs into Production: Our Journey with Intel Gaudi</title>
              <link>https://blog.squeezebits.com/bringing-npus-into-production</link>
              <description>SqueezeBits has partnered with Intel to make Gaudi NPUs more usable in practice. We optimized LLMs and diffusion models for Gaudi-2 and created yetter, a generative AI API service.</description>
              <pubDate>Tue, 01 Jul 2025 07:52:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/bringing-npus-into-production</guid>
              <category>Tech Insight</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Finblog.ai%2Fapi%2Fog-custom%3Ftitle%3DBringing%2BNPUs%2Binto%2BProduction%26tag%3DTemplate%2B1%26description%3DOur%2BJourney%2Bwith%2BIntel%2BGaudi%26template%3D3%26backgroundImage%3Dhttps%253A%252F%252Fsource.inblog.dev%252Fog_image%252F2025-06-30T17%253A56%253A04.091Z-2dedabaa-7c9a-4412-81d5-9f99850dfdff%26bgStartColor%3D%2523ffffff%26bgEndColor%3D%2523ffffff%26textColor%3D%2523000000%26tagColor%3D%2523000000%26descriptionColor%3D%2523000000%26logoUrl%3Dhttps%253A%252F%252Fsource.inblog.dev%252Flogo%252F2024-10-02T08%253A34%253A10.113Z-b9d982b5-d304-4ef1-8139-af0f65b3989e%26blogTitle%3D&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Japan IT Week Spring 2025] What We Saw on the Global AI Frontline in Tokyo</title>
              <link>https://blog.squeezebits.com/tokyo-japan-itwwek-2025-global-ai-expo-experience-en</link>
              <description>SqueezeBits at Japan IT Week Spring 2025 in Tokyo: AI model compression demos, OwLite and Fits on Chips introductions, Japan market entry experiences, and team stories from the frontline.</description>
              <pubDate>Tue, 10 Jun 2025 07:50:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/tokyo-japan-itwwek-2025-global-ai-expo-experience-en</guid>
              <category>Event</category>
              <author>Naeun Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T06%3A28%3A01.294Z-7d60768f-d82c-4186-b18b-ec38af71618d&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>How to Quantize Transformer-based model for TensorRT Deployment</title>
              <link>https://blog.squeezebits.com/how-to-quantize-transformerbased-model-for-tensorrt-deployment-55802</link>
              <description>This article describes the experimental results of quantized Vision Transformer model and its variants with OwLite.</description>
              <pubDate>Tue, 20 May 2025 05:02:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/how-to-quantize-transformerbased-model-for-tensorrt-deployment-55802</guid>
              <category>Product</category>
              <author>Daehyun Ahn</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-06-20T08%3A17%3A35.860Z-7185f8e9-8950-425d-b94f-ed11fba94753&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>How to Quantize YOLO models with OwLite</title>
              <link>https://blog.squeezebits.com/how-to-quantize-yolo-models-with-owlite-54076</link>
              <description>This article describes the experimental results of quantized YOLO models with OwLite.</description>
              <pubDate>Wed, 07 May 2025 00:47:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/how-to-quantize-yolo-models-with-owlite-54076</guid>
              <category>Product</category>
              <author>Daehyun Ahn</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-05-07T01%3A47%3A41.830Z-e9da258f-c1ea-49a1-8c60-4e15a04f46fb&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>OwLite: No More Compromising on AI Performance After Quantization</title>
              <link>https://blog.squeezebits.com/owlite-no-more-compromising-on-ai-performance-after-quantization-51779</link>
              <description>Discover how OwLite simplifies AI model optimization with seamless integration and secure architecture. </description>
              <pubDate>Fri, 11 Apr 2025 08:54:13 GMT</pubDate>
              <guid>https://blog.squeezebits.com/owlite-no-more-compromising-on-ai-performance-after-quantization-51779</guid>
              <category>Product</category>
              <author>Seungryeol Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-04-11T08%3A53%3A56.595Z-0a72ca66-9ad0-40d4-85d7-f4bfd0a92abc&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Intel Gaudi] #5. FLUX.1 on Gaudi-2</title>
              <link>https://blog.squeezebits.com/intel-gaudi-5-flux1-on-gaudi2-50213</link>
              <description>This article discusses inference efficiency when running the FLUX.1 models on Intel Gaudi-2 hardware.</description>
              <pubDate>Wed, 02 Apr 2025 09:20:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/intel-gaudi-5-flux1-on-gaudi2-50213</guid>
              <category>Tech Insight</category>
              <author>Taesu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-04-02T09%3A22%3A19.254Z-91eb807a-ea7f-4432-90f7-3f55d7cef8f2&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Field Notes from the Global AI Market: Our Overseas Event Recap</title>
              <link>https://blog.squeezebits.com/global-ai-events-recap-squeezebits-en</link>
              <description>From Edge AI to NVIDIA GTC: Squeezebits team members share firsthand stories from global AI events, including networking insights, technical trends, and conference experiences.</description>
              <pubDate>Wed, 02 Apr 2025 08:03:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/global-ai-events-recap-squeezebits-en</guid>
              <category>Event</category>
              <author>Naeun Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T06%3A27%3A54.320Z-e59d9960-ed3e-4c22-9dfd-50c9150a0a17&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>TensorRT-LLM Goes Open Source!</title>
              <link>https://blog.squeezebits.com/tensorrtllm-goes-open-source-48780</link>
              <description>With TensorRT-LLM now open source, we can finally take a deep dive into the secret sauce behind its impressive performance.</description>
              <pubDate>Tue, 25 Mar 2025 16:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/tensorrtllm-goes-open-source-48780</guid>
              <category>Tech Insight</category>
              <author>Huijong Jeong</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-03-26T04%3A44%3A23.630Z-8a7b7c86-b440-44c8-b278-3b193ec2d7fb&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>When Should I Use Fits on Chips?</title>
              <link>https://blog.squeezebits.com/when-should-i-use-fits-on-chips-46717</link>
              <description>This article describes when to use Fits on Chips toolkit with specific use cases.</description>
              <pubDate>Mon, 10 Mar 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/when-should-i-use-fits-on-chips-46717</guid>
              <category>Product</category>
              <author>Daehyun Ahn</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-03-10T15%3A39%3A25.097Z-4d47fef5-7df4-40d6-a956-cb0fd7960ea4&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Fits on Chips: Saving LLM Costs Became Easier Than Ever</title>
              <link>https://blog.squeezebits.com/fits-on-chips-saving-llm-costs-became-easier-than-ever-38187</link>
              <description>This article introduces Fits on Chips, an LLMOps toolkit for performance evaluation.</description>
              <pubDate>Wed, 26 Feb 2025 15:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/fits-on-chips-saving-llm-costs-became-easier-than-ever-38187</guid>
              <category>Product</category>
              <author>Seungryeol Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-02-26T18%3A50%3A23.103Z-d35a1d25-f942-4246-a953-8efbeaccdb4e&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>SLEB: Streamlining LLMs through Redundancy Verification and Elimination of Transformer Blocks </title>
              <link>https://blog.squeezebits.com/sleb-streamlining-llms-through-redundancy-verification-and-elimination-of-transformer-blocks-f2bb262342d6</link>
              <description>A brief review of the research paper from our team, published at ICML 2024.</description>
              <pubDate>Mon, 17 Feb 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/sleb-streamlining-llms-through-redundancy-verification-and-elimination-of-transformer-blocks-f2bb262342d6</guid>
              <category>Research</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-24T07%3A25%3A15.921Z-52b66932-6faa-4ee6-a0d8-b29587f8ec5e&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>The Missing Piece of TensorRT-LLM</title>
              <link>https://blog.squeezebits.com/the-missing-piece-of-tensorrtllm-42462</link>
              <description>This article is about an open-source library for direct conversion of PyTorch models to TensorRT-LLM.</description>
              <pubDate>Mon, 10 Feb 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/the-missing-piece-of-tensorrtllm-42462</guid>
              <category>Tech Insight</category>
              <author>Jiwoong Choi</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-02-10T13%3A06%3A48.862Z-39b185fa-9c67-4f94-94aa-91cad3b2458b&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>The Rise and Fall of ONNX (feat. PyTorch 2.0)</title>
              <link>https://blog.squeezebits.com/the-rise-and-fall-of-onnx-feat-pytorch-20-42184</link>
              <description>This article explores the rise and fall of ONNX, from its early success as a unifying stasndard for AI frameworks to its gradual shift into a niche tool in the era of PyTorch 2.0.</description>
              <pubDate>Thu, 06 Feb 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/the-rise-and-fall-of-onnx-feat-pytorch-20-42184</guid>
              <category>Tech Insight</category>
              <author>Taesu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-02-06T14%3A35%3A39.621Z-866e24c1-f21d-4fd3-9e07-2e72f37a2fe7&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #13. Vision-Language Models</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-13-visionlanguage-models-40761</link>
              <description>This article provides a comparative analysis of serving vision-language models on vLLM and TensorRT-LLM.</description>
              <pubDate>Mon, 20 Jan 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-13-visionlanguage-models-40761</guid>
              <category>Tech Insight</category>
              <author>Yeonjoon Jung</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-20T14%3A26%3A08.405Z-716640e7-f4fc-4948-be2c-aa244cb85a2a&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Intel Gaudi] #4. FP8 Quantization </title>
              <link>https://blog.squeezebits.com/intel-gaudi-4-fp8-quantization--40269</link>
              <description>In this blog series, we thoroughly evaluate Intel&apos;s AI accelerator, the Gaudi series, focusing on its performance, features, and usability.</description>
              <pubDate>Mon, 13 Jan 2025 11:14:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/intel-gaudi-4-fp8-quantization--40269</guid>
              <category>Tech Insight</category>
              <author>Minkyu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-13T11%3A16%3A29.693Z-ad7a8277-6cce-44a1-86eb-5b6d8bf4284e&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Intel Gaudi] #3. Performance Evaluation with SynapseAI v1.19</title>
              <link>https://blog.squeezebits.com/intel-gaudi-3-performance-evaluation-with-synapseai-v119-39839</link>
              <description>In this blog series, we thoroughly evaluate Intel&apos;s AI accelerator, the Gaudi series, focusing on its performance, features, and usability.</description>
              <pubDate>Mon, 06 Jan 2025 00:00:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/intel-gaudi-3-performance-evaluation-with-synapseai-v119-39839</guid>
              <category>Tech Insight</category>
              <author>Taesu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-06T16%3A03%3A00.909Z-f6fb3b9b-22cb-4591-b1c4-4c628892c222&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #12. Automatic Prefix Caching</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-12-automatic-prefix-caching-38189</link>
              <description>This article provides a comparative analysis of automatic prefix caching.</description>
              <pubDate>Mon, 23 Dec 2024 02:15:41 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-12-automatic-prefix-caching-38189</guid>
              <category>Tech Insight</category>
              <author>Daehyun Ahn</author><author>Yeonjoon Jung</author><author>Taesu Kim</author><author>Huijong Jeong</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-12-19T06%3A27%3A31.511Z-7394b669-8562-44c0-bc5a-08168258b4c3&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #11. Speculative Decoding</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-11-speculative-decoding-37301</link>
              <description>This article provides a comparative analysis of speculative decoding.</description>
              <pubDate>Mon, 09 Dec 2024 03:26:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-11-speculative-decoding-37301</guid>
              <category>Tech Insight</category>
              <author>Daehyun Ahn</author><author>Yeonjoon Jung</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-12-09T03%3A28%3A05.388Z-f600565a-1d3b-4116-b715-750d8af76460&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #10 Serving Multiple LoRAs at Once</title>
              <link>https://blog.squeezebits.com/37065</link>
              <description>This article provides a comparative analysis of multi-LoRA serving capabilities of vLLM and TensorRT-LLM frameworks.</description>
              <pubDate>Thu, 05 Dec 2024 02:30:58 GMT</pubDate>
              <guid>https://blog.squeezebits.com/37065</guid>
              <category>Tech Insight</category>
              <author>Jongho Lee</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-12-05T02%3A32%3A20.467Z-0205406d-c969-4cc0-b85e-65bda3d0ebfc&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Intel Gaudi] #2. Graph Compiler and Overall Performance Evaluation</title>
              <link>https://blog.squeezebits.com/36821</link>
              <description>In this blog series, we thoroughly evaluate Intel&apos;s AI accelerator, the Gaudi series, focusing on its performance, features, and usability.</description>
              <pubDate>Mon, 02 Dec 2024 16:08:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/36821</guid>
              <category>Tech Insight</category>
              <author>Taesu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-12-02T16%3A09%3A51.142Z-cf006513-0d87-49a8-89b0-d60985515c05&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #9. Parallelism Strategies</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-9-parallelism-strategies-36310</link>
              <description>This article provides a comparative analysis of different parallelism strategies on vLLM and TensorRT-LLM frameworks.</description>
              <pubDate>Tue, 26 Nov 2024 04:35:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-9-parallelism-strategies-36310</guid>
              <category>Tech Insight</category>
              <author>Changjun Lee</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-11-26T04%3A36%3A01.422Z-d0ee47a1-9e1f-4464-bff5-feff8000b590&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[Intel Gaudi] #1. Introduction</title>
              <link>https://blog.squeezebits.com/intel-gaudi-1-introduction-35414</link>
              <description>In this blog series, we thoroughly evaluate Intel&apos;s AI accelerator, the Gaudi series, focusing on its performance, features, and usability.</description>
              <pubDate>Thu, 21 Nov 2024 04:28:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/intel-gaudi-1-introduction-35414</guid>
              <category>Tech Insight</category>
              <author>Taesu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-12-03T05%3A11%3A01.342Z-67c80d3a-83e5-4363-92f6-5b7a59e4020f&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #8. KV Cache Quantization</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-8-kv-cache-quantization-35079</link>
              <description>This article provides a comparative analysis of the effects of KV cache quantization on vLLM and TensorRT-LLM frameworks.</description>
              <pubDate>Mon, 18 Nov 2024 03:01:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-8-kv-cache-quantization-35079</guid>
              <category>Tech Insight</category>
              <author>Jiwon Song</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-11-18T03%3A00%3A37.332Z-dbd37732-a3e8-44e9-9c4f-bc8a0c6456df&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #7. Weight-Activation Quantization</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-7-weightactivation-quantization-34461</link>
              <description>This article provides a comparative analysis of the effects of weight-activation quantization on vLLM and TensorRT-LLM frameworks.</description>
              <pubDate>Mon, 11 Nov 2024 02:38:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-7-weightactivation-quantization-34461</guid>
              <category>Tech Insight</category>
              <author>Eunik Park</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-11-11T14%3A39%3A43.503Z-0000c964-a3ea-43d6-8657-5c6c97b383dd&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #6. Weight-Only Quantization</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-6-weightonly-quantization-33728</link>
              <description>This article provides a comparative analysis of the effects of weight-only quantization on vLLM and TensorRT-LLM frameworks.</description>
              <pubDate>Fri, 01 Nov 2024 04:51:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-6-weightonly-quantization-33728</guid>
              <category>Tech Insight</category>
              <author>Jiwon Song</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-11-01T07%3A29%3A34.703Z-f341cfea-dcfd-49bc-a0e5-5f1bddff62cc&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #5. Dynamic Sequence Lengths </title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-5-dynamic-sequence-lengths--33410</link>
              <description>This article provides a comparative analysis of vLLM and TensorRT-LLM frameworks, focusing on performance with fixed and dynamic datasets.</description>
              <pubDate>Wed, 30 Oct 2024 02:19:10 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-5-dynamic-sequence-lengths--33410</guid>
              <category>Tech Insight</category>
              <author>Minkyu Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-10-30T11%3A17%3A47.005Z-761ef927-ccec-4b32-bc89-3439dd124f69&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #4. Which Scheduler Wins? 🔥</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-4-which-scheduler-wins--33083</link>
              <description>This article provides a comparative analysis of schedulers in vLLM and TensorRT-LLM frameworks.</description>
              <pubDate>Thu, 24 Oct 2024 03:22:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-4-which-scheduler-wins--33083</guid>
              <category>Tech Insight</category>
              <author>Huijong Jeong</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-10-24T03%3A23%3A25.110Z-9ea75506-2e99-4e54-b07f-7bd1e1fd63c2&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #3. Understanding Sampling Methods and Their Performance Impact</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-3-understanding-sampling-methods-and-their-performance-impact-31921</link>
              <description>This article provides a comparative analysis of vLLM and TensorRT-LLM frameworks with various sampling methods. </description>
              <pubDate>Fri, 18 Oct 2024 03:54:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-3-understanding-sampling-methods-and-their-performance-impact-31921</guid>
              <category>Tech Insight</category>
              <author>Daehyun Ahn</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-10-18T03%3A54%3A59.980Z-8534a5d1-9a0c-4481-b4fe-c1ce214c7bac&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #2. Towards Optimal Batching for LLM Serving</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-2-towards-optimal-batching-for-llm-serving-31349</link>
              <description>This article provides a comparative analysis of vLLM and TensorRT-LLM frameworks, focusing on batching configurations and thoroughly examining the effects of maximum batch size and maximum number of tokens.</description>
              <pubDate>Fri, 11 Oct 2024 04:56:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-2-towards-optimal-batching-for-llm-serving-31349</guid>
              <category>Tech Insight</category>
              <author>Yeonjoon Jung</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-10-11T03%3A54%3A47.941Z-88ec91d2-5dea-4a74-9c16-3ea691d51c22&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>[vLLM vs TensorRT-LLM] #1. An Overall Evaluation</title>
              <link>https://blog.squeezebits.com/vllm-vs-tensorrtllm-1-an-overall-evaluation-30703</link>
              <description>This article provides a comparative analysis of vLLM and TensorRT-LLM frameworks for serving LLMs, evaluating their performance based on key metrics like throughput, TTFT, and TPOT to offer insights for practitioners in optimizing LLM deployment strategies.</description>
              <pubDate>Tue, 01 Oct 2024 05:39:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/vllm-vs-tensorrtllm-1-an-overall-evaluation-30703</guid>
              <category>Tech Insight</category>
              <author>Yeonjoon Jung</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2024-10-04T05%3A38%3A14.780Z-77f7d769-cbea-4e91-9273-e52f3035c3f1&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>How much can we save through compression? </title>
              <link>https://blog.squeezebits.com/how-much-can-we-save-through-compression-b675c60611b4</link>
              <description>Estimating the cost savings from model compression.</description>
              <pubDate>Wed, 26 Jun 2024 07:30:29 GMT</pubDate>
              <guid>https://blog.squeezebits.com/how-much-can-we-save-through-compression-b675c60611b4</guid>
              <category>Tech Insight</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-24T07%3A25%3A17.710Z-928589e1-c448-486f-bead-d1334e692949&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Experiencing AI Model Compression Firsthand: Our IT Exhibition Story</title>
              <link>https://blog.squeezebits.com/ai-lightweight-experience-it-exhibition-en</link>
              <description>SqueezeBits&apos; IT exhibition recap: from AI model compression demos to hands-on OwLite experiences, booth visitor reactions, and more. Read our on-the-ground event story!</description>
              <pubDate>Mon, 27 May 2024 08:44:00 GMT</pubDate>
              <guid>https://blog.squeezebits.com/ai-lightweight-experience-it-exhibition-en</guid>
              <category>Event</category>
              <author>Naeun Kim</author>
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2026-04-16T06%3A27%3A40.042Z-9b8150da-542a-4ec5-a554-f5936dc01f44&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>‘Breaking Down’ Tokenizers in LLMs </title>
              <link>https://blog.squeezebits.com/breaking-down-tokenizers-in-llms-5699a8122574</link>
              <description>An introduction to tokenizers and their implications in language models.</description>
              <pubDate>Thu, 16 May 2024 08:23:54 GMT</pubDate>
              <guid>https://blog.squeezebits.com/breaking-down-tokenizers-in-llms-5699a8122574</guid>
              <category>Tech Insight</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-24T07%3A25%3A14.085Z-e77cfeb7-47aa-42aa-81fd-0e976351c91a&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Accuracy Degradation in AI Compression: Myth or Truth? </title>
              <link>https://blog.squeezebits.com/accuracy-degradation-in-ai-compression-myth-or-truth-c7a94ec0bc92</link>
              <description>Clarifying the misunderstandings in AI model compression</description>
              <pubDate>Wed, 24 Apr 2024 05:19:41 GMT</pubDate>
              <guid>https://blog.squeezebits.com/accuracy-degradation-in-ai-compression-myth-or-truth-c7a94ec0bc92</guid>
              <category>Tech Insight</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-24T07%3A25%3A22.246Z-376a45e3-0169-482c-8650-35a84844ff43&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Are you getting everything out of your GPUs?</title>
              <link>https://blog.squeezebits.com/are-you-getting-everything-out-of-your-gpus-1f030a4a460f</link>
              <description>The Blackwell GPU from GTC 2024 was astonishing.Analysis of the Nvidia GPU evolution &amp; what it means for GPU users.</description>
              <pubDate>Tue, 23 Apr 2024 02:20:50 GMT</pubDate>
              <guid>https://blog.squeezebits.com/are-you-getting-everything-out-of-your-gpus-1f030a4a460f</guid>
              <category>Tech Insight</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-24T07%3A25%3A32.058Z-e085c876-ece4-412b-8e10-2e7e7d494a4d&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>Things to check if your business utilizes AI </title>
              <link>https://blog.squeezebits.com/things-to-check-if-your-business-utilizes-ai-53be650a1248</link>
              <description>Do I need to COMPRESS my AI model? : the short answer is “YES” — and here’s why.</description>
              <pubDate>Fri, 19 Apr 2024 04:41:25 GMT</pubDate>
              <guid>https://blog.squeezebits.com/things-to-check-if-your-business-utilizes-ai-53be650a1248</guid>
              <category>Tech Insight</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-24T07%3A26%3A00.249Z-e77ecde3-fc36-4025-a8df-6a72abe1b208&amp;w=1920&amp;q=85" type="image/webp"/>
            </item><item>
              <title>AI Compression for Acceleration:  4 Key Methods. </title>
              <link>https://blog.squeezebits.com/4-types-of-ai-compression-methods-you-should-know-5d07759c60a7</link>
              <description>AI model compression for acceleration is essential. The question is HOW? Here are 4 key methodologies.</description>
              <pubDate>Mon, 15 Apr 2024 08:55:19 GMT</pubDate>
              <guid>https://blog.squeezebits.com/4-types-of-ai-compression-methods-you-should-know-5d07759c60a7</guid>
              <category>Tech Insight</category>
              
              <enclosure url="https://image.inblog.dev?url=https%3A%2F%2Fsource.inblog.dev%2Ffeatured_image%2F2025-01-24T07%3A25%3A21.295Z-f352b4e3-158f-465f-b9b8-f128babae2fb&amp;w=1920&amp;q=85" type="image/webp"/>
            </item>
      </channel>
    </rss>