<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>Quantization on Luke Salamone&#39;s Blog</title>
		<link>https://blog.lukesalamone.com/tags/quantization/</link>
		<description>Recent content in Quantization on Luke Salamone&#39;s Blog</description>
		<generator>Hugo</generator>
		<language>en-us</language>
		
		
		
		
			<lastBuildDate>Fri, 19 Jun 2026 22:24:12 -0700</lastBuildDate>
		
			<atom:link href="https://blog.lukesalamone.com/tags/quantization/index.xml" rel="self" type="application/rss+xml" />
			<item>
				<title>Semantic Search in Under 3MB</title>
				<link>https://blog.lukesalamone.com/posts/creating-tiny-semantic-search/</link>
				<pubDate>Fri, 19 Jun 2026 22:24:12 -0700</pubDate>
				<guid>https://blog.lukesalamone.com/posts/creating-tiny-semantic-search/</guid>
				<description>&lt;p&gt;This project is a continuation of my &lt;a href=&#34;../../posts/autoresearch/&#34;&gt;previous autoresearch project&lt;/a&gt;, which optimized a reranking model to be under 10MB. Digging deeper by hand, I was able to take the size reduction much further, while outperforming reranking models which are 30x larger on this task. In the end I was able to reduce the payload from 11.4 MB to 2.79 MB gzipped.&lt;/p&gt;&#xA;&lt;p&gt;You can see it in action on my &lt;a href=&#34;https://lukesalamone.com&#34;&gt;resume page&lt;/a&gt;.&lt;/p&gt;&#xA;&lt;figure&gt;&lt;img src=&#34;../../img/tiny_reranker.png&#34;&#xA;&#x9;&#x9;&#x9;alt=&#34;Each square represents 1 kB. The majority of overall size reduction came from removing the ORT dependency. However, other changes enabled much better representation quality than the baseline.&#34;&gt;&lt;figcaption&gt;&#xA;&#x9;&#x9;&#x9;&lt;p&gt;Each square represents 1 kB. The majority of overall size reduction came from removing the ORT dependency. However, other changes enabled much better representation quality than the baseline.&lt;/p&gt;</description>
			</item>
	</channel>
</rss>
