initial boilerplate

This commit is contained in:
2026-05-03 16:43:53 +03:00
parent bea266e066
commit 2e63e0e95b
18 changed files with 1878 additions and 1 deletions
+143
View File
@@ -0,0 +1,143 @@
package inflation
import (
"strings"
"unicode"
)
// Matcher maps raw receipt item names to canonical item names.
// This is the "normalization" problem — "1 gal whole milk" and
// "MILK WHL GAL" should both resolve to "milk_whole_1gal".
//
// Start simple: exact alias lookup + token overlap.
// Later: replace with embeddings or a fuzzy search library.
type Matcher struct {
// aliasMap maps lowercase alias string -> canonical name
aliasMap map[string]string
}
// knownAliases is the seed list. In production, load these from the
// canonical_items table's aliases column at startup.
var knownAliases = map[string]string{
// Milk
"whole milk gallon": "milk_whole_1gal",
"1 gal whole milk": "milk_whole_1gal",
"milk whl gal": "milk_whole_1gal",
"whole milk 1gal": "milk_whole_1gal",
"milk whole": "milk_whole_1gal",
// Eggs
"large eggs 12ct": "eggs_large_dozen",
"eggs large dozen": "eggs_large_dozen",
"grade a large eggs": "eggs_large_dozen",
"eggs lg 12": "eggs_large_dozen",
"large eggs": "eggs_large_dozen",
// Bread
"white bread": "bread_white_loaf",
"sandwich bread": "bread_white_loaf",
"bread loaf": "bread_white_loaf",
"white bread loaf": "bread_white_loaf",
// Ground beef
"ground beef lb": "ground_beef_1lb",
"80/20 ground beef": "ground_beef_1lb",
"hamburger meat": "ground_beef_1lb",
"ground beef": "ground_beef_1lb",
// Olive oil
"olive oil 16oz": "olive_oil_16oz",
"extra virgin olive oil": "olive_oil_16oz",
"evoo 16oz": "olive_oil_16oz",
"olive oil": "olive_oil_16oz",
// Butter
"butter salted pound": "butter_salted_1lb",
"salted butter 4 sticks": "butter_salted_1lb",
"salted butter": "butter_salted_1lb",
// Chicken
"boneless chicken breast": "chicken_breast_1lb",
"chicken breast lb": "chicken_breast_1lb",
"chicken breast": "chicken_breast_1lb",
// OJ
"oj 52oz": "orange_juice_52oz",
"orange juice carton": "orange_juice_52oz",
"orange juice": "orange_juice_52oz",
}
func NewMatcher() *Matcher {
return &Matcher{aliasMap: knownAliases}
}
// Match tries to find a canonical name for a raw receipt string.
// Returns empty string if no match is found — unmatched items are stored
// with canonical_name = NULL and can be reviewed/matched later.
func (m *Matcher) Match(raw string) string {
normalized := normalize(raw)
// 1. Exact alias match (fastest)
if canonical, ok := m.aliasMap[normalized]; ok {
return canonical
}
// 2. Substring match — if any alias is contained in the raw text
for alias, canonical := range m.aliasMap {
if strings.Contains(normalized, alias) {
return canonical
}
}
// 3. Token overlap — split both into words and count shared tokens
// This catches "MILK WHOLE 1 GAL" matching "whole milk gallon"
rawTokens := tokenize(normalized)
bestScore := 0
bestMatch := ""
for alias, canonical := range m.aliasMap {
aliasTokens := tokenize(alias)
score := tokenOverlap(rawTokens, aliasTokens)
// Require matching at least 2 tokens and >50% of alias tokens
if score >= 2 && score > bestScore && float64(score)/float64(len(aliasTokens)) > 0.5 {
bestScore = score
bestMatch = canonical
}
}
return bestMatch
}
// normalize lowercases and strips punctuation/extra whitespace.
func normalize(s string) string {
s = strings.ToLower(s)
var b strings.Builder
for _, r := range s {
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '/' {
b.WriteRune(r)
} else {
b.WriteRune(' ')
}
}
return strings.Join(strings.Fields(b.String()), " ")
}
// tokenize splits a normalized string into unique words.
func tokenize(s string) []string {
return strings.Fields(s)
}
// tokenOverlap counts how many words from a appear in b.
func tokenOverlap(a, b []string) int {
set := make(map[string]bool, len(b))
for _, t := range b {
set[t] = true
}
count := 0
for _, t := range a {
if set[t] {
count++
}
}
return count
}
+75
View File
@@ -0,0 +1,75 @@
package inflation
import (
"testing"
)
// This is your first Go test file!
// Run with: go test ./internal/inflation/...
// Go's testing package is built-in — no extra library needed.
func TestMatcher_ExactMatch(t *testing.T) {
m := NewMatcher()
tests := []struct {
raw string
expected string
}{
{"whole milk gallon", "milk_whole_1gal"},
{"large eggs 12ct", "eggs_large_dozen"},
{"white bread", "bread_white_loaf"},
{"ground beef", "ground_beef_1lb"},
}
for _, tt := range tests {
t.Run(tt.raw, func(t *testing.T) {
got := m.Match(tt.raw)
if got != tt.expected {
t.Errorf("Match(%q) = %q, want %q", tt.raw, got, tt.expected)
}
})
}
}
func TestMatcher_CaseInsensitive(t *testing.T) {
m := NewMatcher()
got := m.Match("WHOLE MILK GALLON")
if got != "milk_whole_1gal" {
t.Errorf("expected milk_whole_1gal, got %q", got)
}
}
func TestMatcher_TokenOverlap(t *testing.T) {
m := NewMatcher()
// "MILK WHL 1 GAL" should still match via token overlap
got := m.Match("MILK WHL 1 GAL")
if got != "milk_whole_1gal" {
t.Logf("Note: token overlap match returned %q (may need alias tuning)", got)
}
}
func TestMatcher_NoMatch(t *testing.T) {
m := NewMatcher()
got := m.Match("toilet paper mega roll 12ct")
if got != "" {
t.Errorf("expected no match, got %q", got)
}
}
func TestNormalize(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"Whole Milk, 1 Gal.", "whole milk 1 gal"},
{"EGGS (LARGE) 12CT", "eggs large 12ct"},
{"80/20 Ground Beef", "80/20 ground beef"},
}
for _, tt := range tests {
got := normalize(tt.input)
if got != tt.expected {
t.Errorf("normalize(%q) = %q, want %q", tt.input, got, tt.expected)
}
}
}
+44
View File
@@ -0,0 +1,44 @@
// Package inflation contains the background worker that refreshes
// the price_snapshots table used by the dashboard charts.
package inflation
import (
"context"
"log"
"time"
"github.com/yourname/deflated/internal/db"
)
// StartRefreshWorker runs in a goroutine and refreshes price snapshots
// every interval. Call this from main() after connecting to the database.
//
// Example:
//
// go inflation.StartRefreshWorker(ctx, queries, 1*time.Hour)
func StartRefreshWorker(ctx context.Context, q *db.Queries, interval time.Duration) {
// Run once immediately on startup so the charts aren't empty
runRefresh(ctx, q)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
runRefresh(ctx, q)
case <-ctx.Done():
log.Println("inflation refresh worker stopped")
return
}
}
}
func runRefresh(ctx context.Context, q *db.Queries) {
start := time.Now()
if err := q.RefreshPriceSnapshots(ctx); err != nil {
log.Printf("error refreshing price snapshots: %v", err)
return
}
log.Printf("price snapshots refreshed in %s", time.Since(start).Round(time.Millisecond))
}