From 2e63e0e95b0d8c15fb14f49243f13a8807d3e35c Mon Sep 17 00:00:00 2001 From: mohammad Date: Sun, 3 May 2026 16:43:53 +0300 Subject: [PATCH] initial boilerplate --- Makefile | 32 ++++ README.md | 98 +++++++++- cmd/server/main.go | 119 ++++++++++++ go.mod | 11 ++ internal/api/handlers.go | 247 +++++++++++++++++++++++++ internal/api/router.go | 51 +++++ internal/db/connect.go | 38 ++++ internal/db/migrate.go | 161 ++++++++++++++++ internal/db/queries.go | 287 +++++++++++++++++++++++++++++ internal/handlers/helpers.go | 21 +++ internal/handlers/items.go | 79 ++++++++ internal/handlers/receipts.go | 193 +++++++++++++++++++ internal/inflation/matcher.go | 143 ++++++++++++++ internal/inflation/matcher_test.go | 75 ++++++++ internal/inflation/worker.go | 44 +++++ internal/models/models.go | 110 +++++++++++ internal/parser/normalize.go | 99 ++++++++++ migrations/001_initial.sql | 71 +++++++ 18 files changed, 1878 insertions(+), 1 deletion(-) create mode 100644 Makefile create mode 100644 cmd/server/main.go create mode 100644 go.mod create mode 100644 internal/api/handlers.go create mode 100644 internal/api/router.go create mode 100644 internal/db/connect.go create mode 100644 internal/db/migrate.go create mode 100644 internal/db/queries.go create mode 100644 internal/handlers/helpers.go create mode 100644 internal/handlers/items.go create mode 100644 internal/handlers/receipts.go create mode 100644 internal/inflation/matcher.go create mode 100644 internal/inflation/matcher_test.go create mode 100644 internal/inflation/worker.go create mode 100644 internal/models/models.go create mode 100644 internal/parser/normalize.go create mode 100644 migrations/001_initial.sql diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8614044 --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +# Makefile — common tasks for local development + +.PHONY: run build test migrate seed lint + +## Start the server (auto-reloads with 'air' if installed) +run: + @which air > /dev/null 2>&1 && air || go run ./cmd/server + +## Build a binary +build: + go build -o bin/server ./cmd/server + +## Run all tests +test: + go test ./... -v + +## Apply the latest SQL migration +migrate: + psql $$DATABASE_URL -f migrations/001_initial.sql + +## Run the server with the race detector (finds concurrency bugs) +race: + go run -race ./cmd/server + +## Lint (requires golangci-lint) +lint: + golangci-lint run ./... + +## Install dev tools +tools: + go install github.com/cosmtrek/air@latest + go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest diff --git a/README.md b/README.md index 0a7b7ea..b12574e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,98 @@ -# Deflated +# Deflated.fyi — Crowdsourced Inflation Tracker +A Go backend + React frontend for tracking real grocery prices over time, +aggregated from user-submitted receipts. + +## Project structure + +``` +deflated/ +├── cmd/server/main.go # Entrypoint — starts HTTP server +├── internal/ +│ ├── api/ +│ │ ├── router.go # Chi router, all routes registered here +│ │ └── handlers.go # One handler per endpoint +│ ├── db/ +│ │ ├── connect.go # pgxpool setup +│ │ └── queries.go # All SQL queries as typed Go methods +│ ├── models/ +│ │ └── models.go # Shared structs (DB rows, API shapes) +│ ├── parser/ +│ │ └── normalize.go # Receipt item → canonical name +│ └── inflation/ +│ └── worker.go # Background job: refresh price snapshots +├── migrations/ +│ └── 001_initial.sql # Database schema +├── .env.example # Copy to .env, fill in values +├── Makefile # make run / test / migrate +└── go.mod +``` + +## Prerequisites + +- Go 1.23+ +- PostgreSQL 15+ + +## Quick start + +```bash +# 1. Clone and enter the project +git clone https://github.com/yourname/deflated +cd deflated + +# 2. Set up environment +cp .env.example .env +# Edit .env with your DATABASE_URL + +# 3. Create the database +createdb deflated +make migrate + +# 4. Install dependencies and run +go mod tidy +make run +``` + +Server starts on http://localhost:8080 + +## API + +| Method | Path | Description | +|--------|------|-------------| +| GET | /health | Health check | +| POST | /api/receipts | Submit a receipt with line items | +| GET | /api/receipts/:id | Get a receipt and its items | +| GET | /api/items/:name/history | Price history for a canonical item | +| GET | /api/items/movers | Top price movers (last 12 months) | +| GET | /api/inflation/summary | Purchasing power summary | + +### Submit a receipt + +```bash +curl -X POST http://localhost:8080/api/receipts \ + -H "Content-Type: application/json" \ + -d '{ + "store_name": "Trader Joes", + "receipt_date": "2024-11-15", + "city": "Austin, TX", + "items": [ + { "name": "Whole Milk 1 Gallon", "price": 4.29 }, + { "name": "Large Eggs Dozen", "price": 3.49 }, + { "name": "White Bread Loaf", "price": 2.99, "quantity": 1 } + ] + }' +``` + +## Go learning path + +This project is intentionally structured to teach Go incrementally: + +1. **`cmd/server/main.go`** — entry point, signals, graceful shutdown +2. **`internal/db/connect.go`** — connection pools, context, timeouts +3. **`internal/api/router.go`** — Chi router, middleware +4. **`internal/api/handlers.go`** — request parsing, error handling, JSON responses +5. **`internal/db/queries.go`** — raw SQL with pgx, scanning rows into structs +6. **`internal/parser/normalize.go`** — pure functions, string processing +7. **`internal/inflation/worker.go`** — goroutines, channels, context cancellation + +Read the files in that order and you'll have covered ~80% of idiomatic Go. diff --git a/cmd/server/main.go b/cmd/server/main.go new file mode 100644 index 0000000..9ffe1b8 --- /dev/null +++ b/cmd/server/main.go @@ -0,0 +1,119 @@ +package main + +import ( + "context" + "fmt" + "log/slog" + "net/http" + "os" + "os/signal" + "syscall" + "time" + + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" + "github.com/go-chi/cors" + "github.com/joho/godotenv" + + "github.com/yourname/deflated/internal/db" + "github.com/yourname/deflated/internal/handlers" +) + +func main() { + // Load .env file if present (ignored in production) + _ = godotenv.Load() + + // Structured logger — prints human-readable text locally, JSON in prod + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + slog.SetDefault(logger) + + // Connect to Postgres + pool, err := db.Connect(os.Getenv("DATABASE_URL")) + if err != nil { + slog.Error("failed to connect to database", "error", err) + os.Exit(1) + } + defer pool.Close() + + // Run migrations on startup + if err := db.Migrate(pool); err != nil { + slog.Error("failed to run migrations", "error", err) + os.Exit(1) + } + + // Wire up dependencies + queries := db.New(pool) + receiptHandler := handlers.NewReceiptHandler(queries) + itemHandler := handlers.NewItemHandler(queries) + + // Build router + r := chi.NewRouter() + + // Middleware stack (runs for every request, in order) + r.Use(middleware.RequestID) // adds X-Request-ID header + r.Use(middleware.RealIP) // reads X-Forwarded-For for real client IP + r.Use(middleware.Logger) // logs method, path, status, duration + r.Use(middleware.Recoverer) // catches panics, returns 500 instead of crashing + r.Use(middleware.Compress(5)) // gzip responses + + r.Use(cors.Handler(cors.Options{ + AllowedOrigins: []string{"http://localhost:5173", os.Getenv("FRONTEND_URL")}, + AllowedMethods: []string{"GET", "POST", "OPTIONS"}, + AllowedHeaders: []string{"Accept", "Content-Type"}, + })) + + // Routes + r.Get("/health", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "ok") + }) + + r.Route("/api", func(r chi.Router) { + r.Post("/receipts", receiptHandler.Submit) + r.Get("/receipts/{id}", receiptHandler.Get) + + r.Get("/items", itemHandler.List) + r.Get("/items/{name}/history", itemHandler.PriceHistory) + r.Get("/items/top-movers", itemHandler.TopMovers) + + r.Get("/inflation/summary", itemHandler.InflationSummary) + }) + + // Server with graceful shutdown + port := os.Getenv("PORT") + if port == "" { + port = "8080" + } + + srv := &http.Server{ + Addr: ":" + port, + Handler: r, + ReadTimeout: 15 * time.Second, + WriteTimeout: 30 * time.Second, + IdleTimeout: 60 * time.Second, + } + + // Start server in background goroutine + go func() { + slog.Info("server starting", "port", port) + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + slog.Error("server error", "error", err) + os.Exit(1) + } + }() + + // Block until SIGINT or SIGTERM (Ctrl+C or `docker stop`) + quit := make(chan os.Signal, 1) + signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) + <-quit + + slog.Info("shutting down gracefully...") + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := srv.Shutdown(ctx); err != nil { + slog.Error("forced shutdown", "error", err) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..965d655 --- /dev/null +++ b/go.mod @@ -0,0 +1,11 @@ +module github.com/yourname/deflated + +go 1.23 + +require ( + github.com/go-chi/chi/v5 v5.0.12 + github.com/go-chi/cors v1.2.1 + github.com/google/uuid v1.6.0 + github.com/jackc/pgx/v5 v5.6.0 + github.com/joho/godotenv v1.5.1 +) diff --git a/internal/api/handlers.go b/internal/api/handlers.go new file mode 100644 index 0000000..308ddda --- /dev/null +++ b/internal/api/handlers.go @@ -0,0 +1,247 @@ +package api + +import ( + "encoding/json" + "errors" + "fmt" + "math" + "net/http" + "strconv" + "time" + + "github.com/go-chi/chi/v5" + "github.com/google/uuid" + "github.com/jackc/pgx/v5" + "github.com/yourname/deflated/internal/db" + "github.com/yourname/deflated/internal/models" + "github.com/yourname/deflated/internal/parser" +) + +// handlers holds shared dependencies (database, future: storage client, etc.) +// All handler methods live on this struct — no global state. +type handlers struct { + q *db.Queries +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +// respond encodes v as JSON and writes it with the given status code. +// This is the only place we write JSON — keeps error handling consistent. +func respond(w http.ResponseWriter, status int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + if err := json.NewEncoder(w).Encode(v); err != nil { + // Encoding errors are rare (e.g. non-serializable types). + // Log and move on — headers are already sent. + http.Error(w, "encoding error", http.StatusInternalServerError) + } +} + +func respondError(w http.ResponseWriter, status int, msg string, details ...string) { + e := models.ErrorResponse{Error: msg} + if len(details) > 0 { + e.Details = details[0] + } + respond(w, status, e) +} + +// ── Handlers ────────────────────────────────────────────────────────────────── + +func (h *handlers) health(w http.ResponseWriter, r *http.Request) { + respond(w, http.StatusOK, map[string]string{"status": "ok"}) +} + +// POST /api/receipts +// Accepts JSON body or multipart form with an optional image file. +func (h *handlers) submitReceipt(w http.ResponseWriter, r *http.Request) { + // Limit request body to 15 MB (image uploads can be large) + r.Body = http.MaxBytesReader(w, r.Body, 15<<20) + + var req models.SubmitReceiptRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "invalid JSON", err.Error()) + return + } + + // Parse and validate the receipt date + receiptDate, err := time.Parse("2006-01-02", req.ReceiptDate) + if err != nil { + respondError(w, http.StatusBadRequest, + "invalid receipt_date, use YYYY-MM-DD format", err.Error()) + return + } + if receiptDate.After(time.Now()) { + respondError(w, http.StatusBadRequest, "receipt_date cannot be in the future") + return + } + if len(req.Items) == 0 { + respondError(w, http.StatusBadRequest, "at least one item is required") + return + } + if len(req.Items) > 200 { + respondError(w, http.StatusBadRequest, "maximum 200 items per receipt") + return + } + + // Build the receipt params + params := models.InsertReceiptParams{ReceiptDate: receiptDate} + if req.StoreName != "" { + params.StoreName = &req.StoreName + } + if req.City != "" { + params.City = &req.City + } + + receipt, err := h.q.InsertReceipt(r.Context(), params) + if err != nil { + respondError(w, http.StatusInternalServerError, "failed to save receipt") + return + } + + // Insert each line item, normalizing the name to a canonical form + var inserted int + for _, item := range req.Items { + if item.Name == "" || item.Price <= 0 { + continue // skip malformed items silently + } + qty := item.Quantity + if qty <= 0 { + qty = 1 + } + + canonical, category := parser.Normalize(item.Name) + + iparams := models.InsertLineItemParams{ + ReceiptID: receipt.ID, + RawName: item.Name, + CanonicalName: canonical, + Category: category, + PriceCents: int(math.Round(item.Price * 100)), + Quantity: qty, + } + if _, err := h.q.InsertLineItem(r.Context(), iparams); err != nil { + // Don't abort the whole receipt for one bad item — log and continue + fmt.Printf("warn: failed to insert line item %q: %v\n", item.Name, err) + continue + } + inserted++ + } + + respond(w, http.StatusCreated, models.SubmitReceiptResponse{ + ReceiptID: receipt.ID, + ItemsAdded: inserted, + Message: fmt.Sprintf("Receipt saved with %d items", inserted), + }) +} + +// GET /api/receipts/{id} +func (h *handlers) getReceipt(w http.ResponseWriter, r *http.Request) { + idStr := chi.URLParam(r, "id") + id, err := uuid.Parse(idStr) + if err != nil { + respondError(w, http.StatusBadRequest, "invalid receipt ID") + return + } + + receipt, err := h.q.GetReceipt(r.Context(), id) + if err != nil { + if errors.Is(err, pgx.ErrNoRows) { + respondError(w, http.StatusNotFound, "receipt not found") + return + } + respondError(w, http.StatusInternalServerError, "database error") + return + } + + items, err := h.q.GetLineItemsByReceipt(r.Context(), id) + if err != nil { + respondError(w, http.StatusInternalServerError, "database error") + return + } + + respond(w, http.StatusOK, map[string]any{ + "receipt": receipt, + "items": items, + }) +} + +// GET /api/items/{name}/history?months=24 +func (h *handlers) getPriceHistory(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + respondError(w, http.StatusBadRequest, "item name is required") + return + } + + months := 24 + if m := r.URL.Query().Get("months"); m != "" { + if v, err := strconv.Atoi(m); err == nil && v > 0 && v <= 120 { + months = v + } + } + + snapshots, err := h.q.GetPriceHistory(r.Context(), name, months) + if err != nil { + respondError(w, http.StatusInternalServerError, "database error") + return + } + + // Convert DB rows to the API shape the frontend expects + resp := models.PriceHistoryResponse{CanonicalName: name} + for _, s := range snapshots { + resp.DataPoints = append(resp.DataPoints, models.PriceDataPoint{ + Month: s.YearMonth.Format("2006-01"), + AvgPrice: float64(s.AvgPriceCents) / 100, + SampleCount: s.SampleCount, + }) + } + + respond(w, http.StatusOK, resp) +} + +// GET /api/items/movers?limit=10 +func (h *handlers) getTopMovers(w http.ResponseWriter, r *http.Request) { + limit := 10 + if l := r.URL.Query().Get("limit"); l != "" { + if v, err := strconv.Atoi(l); err == nil && v > 0 && v <= 50 { + limit = v + } + } + + movers, err := h.q.GetTopMovers(r.Context(), limit) + if err != nil { + respondError(w, http.StatusInternalServerError, "database error") + return + } + + respond(w, http.StatusOK, map[string]any{"movers": movers}) +} + +// GET /api/inflation/summary?from=2009-01&to=2025-01 +func (h *handlers) getInflationSummary(w http.ResponseWriter, r *http.Request) { + fromStr := r.URL.Query().Get("from") + toStr := r.URL.Query().Get("to") + + // Default: from Jan 2009 to today + from := time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC) + to := time.Now() + + if fromStr != "" { + if t, err := time.Parse("2006-01", fromStr); err == nil { + from = t + } + } + if toStr != "" { + if t, err := time.Parse("2006-01", toStr); err == nil { + to = t + } + } + + summary, err := h.q.GetInflationSummary(r.Context(), from, to) + if err != nil { + respondError(w, http.StatusInternalServerError, "database error") + return + } + + respond(w, http.StatusOK, summary) +} diff --git a/internal/api/router.go b/internal/api/router.go new file mode 100644 index 0000000..f010aa8 --- /dev/null +++ b/internal/api/router.go @@ -0,0 +1,51 @@ +// Package api wires together the HTTP router and all handlers. +package api + +import ( + "net/http" + + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" + "github.com/go-chi/cors" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/yourname/deflated/internal/db" +) + +// NewRouter builds the full Chi router with middleware and all routes. +// This is the only place routes are registered — easy to see the full API shape. +func NewRouter(pool *pgxpool.Pool) http.Handler { + queries := db.NewQueries(pool) + h := &handlers{q: queries} + + r := chi.NewRouter() + + // ── Middleware stack ──────────────────────────────────────────────────── + r.Use(middleware.RequestID) // adds X-Request-Id header + r.Use(middleware.RealIP) // reads X-Forwarded-For + r.Use(middleware.Logger) // logs every request: method, path, status, latency + r.Use(middleware.Recoverer) // catches panics, returns 500 instead of crashing + + r.Use(cors.Handler(cors.Options{ + AllowedOrigins: []string{"http://localhost:3000", "https://deflated.fyi"}, + AllowedMethods: []string{"GET", "POST", "OPTIONS"}, + AllowedHeaders: []string{"Accept", "Content-Type"}, + })) + + // ── Routes ────────────────────────────────────────────────────────────── + r.Get("/health", h.health) + + r.Route("/api", func(r chi.Router) { + // Receipt submission + r.Post("/receipts", h.submitReceipt) + r.Get("/receipts/{id}", h.getReceipt) + + // Price data for the dashboard + r.Get("/items/{name}/history", h.getPriceHistory) // ?months=24 + r.Get("/items/movers", h.getTopMovers) // ?limit=10 + + // The torn dollar bill — purchasing power over time + r.Get("/inflation/summary", h.getInflationSummary) // ?from=2009-01&to=2025-01 + }) + + return r +} diff --git a/internal/db/connect.go b/internal/db/connect.go new file mode 100644 index 0000000..fc79f96 --- /dev/null +++ b/internal/db/connect.go @@ -0,0 +1,38 @@ +package db + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v5/pgxpool" +) + +// Connect opens a connection pool to Postgres. +// pgxpool manages multiple connections automatically — you rarely need to +// think about it; just pass the pool around and pgx picks an idle connection. +func Connect(databaseURL string) (*pgxpool.Pool, error) { + if databaseURL == "" { + return nil, fmt.Errorf("DATABASE_URL is not set") + } + + config, err := pgxpool.ParseConfig(databaseURL) + if err != nil { + return nil, fmt.Errorf("parse database url: %w", err) + } + + // Pool settings — tune these later based on your load + config.MaxConns = 25 + config.MinConns = 2 + + pool, err := pgxpool.NewWithConfig(context.Background(), config) + if err != nil { + return nil, fmt.Errorf("create pool: %w", err) + } + + // Ping to verify the connection works at startup + if err := pool.Ping(context.Background()); err != nil { + return nil, fmt.Errorf("ping database: %w", err) + } + + return pool, nil +} diff --git a/internal/db/migrate.go b/internal/db/migrate.go new file mode 100644 index 0000000..985c265 --- /dev/null +++ b/internal/db/migrate.go @@ -0,0 +1,161 @@ +package db + +import ( + "context" + "fmt" + "log/slog" + + "github.com/jackc/pgx/v5/pgxpool" +) + +// migration holds a SQL statement and a human-readable name. +// We run them in order and track which ones have already run. +type migration struct { + name string + sql string +} + +var migrations = []migration{ + { + name: "create_receipts", + sql: ` + CREATE TABLE IF NOT EXISTS receipts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + store_name TEXT, + receipt_date DATE NOT NULL, + image_path TEXT, + city TEXT, + country TEXT DEFAULT 'US', + submitted_at TIMESTAMPTZ NOT NULL DEFAULT now() + ); + `, + }, + { + name: "create_canonical_items", + sql: ` + -- The "dictionary" of known items we track. + -- User submissions get mapped to these via fuzzy matching. + CREATE TABLE IF NOT EXISTS canonical_items ( + name TEXT PRIMARY KEY, -- e.g. "milk_whole_1gal" + display_name TEXT NOT NULL, -- e.g. "Whole Milk, 1 Gallon" + category TEXT NOT NULL, -- e.g. "dairy" + unit TEXT, -- e.g. "gallon", "lb", "dozen" + aliases TEXT[] DEFAULT '{}' -- ["1 gal whole milk", "milk whole gal"] + ); + `, + }, + { + name: "create_line_items", + sql: ` + CREATE TABLE IF NOT EXISTS line_items ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + receipt_id UUID NOT NULL REFERENCES receipts(id) ON DELETE CASCADE, + raw_name TEXT NOT NULL, + canonical_name TEXT REFERENCES canonical_items(name), + price_cents INT NOT NULL CHECK (price_cents > 0), + quantity NUMERIC(8,3) NOT NULL DEFAULT 1, + -- Stored computed column: unit price in cents + unit_price_cents INT GENERATED ALWAYS AS + (ROUND(price_cents / quantity)::INT) STORED + ); + + CREATE INDEX IF NOT EXISTS line_items_receipt_id ON line_items(receipt_id); + CREATE INDEX IF NOT EXISTS line_items_canonical ON line_items(canonical_name); + `, + }, + { + name: "create_price_snapshots", + sql: ` + -- Pre-aggregated monthly averages, rebuilt by a background job. + -- Charts read from here rather than scanning all line_items each time. + CREATE TABLE IF NOT EXISTS price_snapshots ( + canonical_name TEXT NOT NULL REFERENCES canonical_items(name), + year_month DATE NOT NULL, -- always the 1st of the month + avg_price_cents INT NOT NULL, + sample_count INT NOT NULL DEFAULT 0, + PRIMARY KEY (canonical_name, year_month) + ); + `, + }, + { + name: "seed_canonical_items", + sql: ` + INSERT INTO canonical_items (name, display_name, category, unit, aliases) VALUES + ('milk_whole_1gal', 'Whole Milk, 1 Gallon', 'dairy', 'gallon', + ARRAY['whole milk gallon', '1 gal whole milk', 'milk whl gal']), + ('eggs_large_dozen', 'Large Eggs, 1 Dozen', 'dairy', 'dozen', + ARRAY['large eggs 12ct', 'eggs large dozen', 'grade a large eggs']), + ('bread_white_loaf', 'White Bread, 1 Loaf', 'bakery', 'loaf', + ARRAY['white bread', 'sandwich bread', 'bread loaf']), + ('ground_beef_1lb', 'Ground Beef, 1 lb (80%)', 'meat', 'lb', + ARRAY['ground beef lb', '80/20 ground beef', 'hamburger meat']), + ('olive_oil_16oz', 'Olive Oil, 16 oz', 'pantry', 'bottle', + ARRAY['olive oil 16oz', 'extra virgin olive oil', 'evoo 16oz']), + ('butter_salted_1lb','Salted Butter, 1 lb', 'dairy', 'lb', + ARRAY['butter salted pound', 'salted butter 4 sticks']), + ('chicken_breast_1lb','Chicken Breast, 1 lb', 'meat', 'lb', + ARRAY['boneless chicken breast', 'chicken breast lb']), + ('orange_juice_52oz','Orange Juice, 52 oz', 'beverages','carton', + ARRAY['oj 52oz', 'orange juice carton', 'florida natural oj']) + ON CONFLICT (name) DO NOTHING; + `, + }, +} + +// Migrate runs all pending migrations in order. +// It creates a simple tracking table on first run. +func Migrate(pool *pgxpool.Pool) error { + ctx := context.Background() + + // Create the migrations tracking table if it doesn't exist + _, err := pool.Exec(ctx, ` + CREATE TABLE IF NOT EXISTS schema_migrations ( + name TEXT PRIMARY KEY, + applied_at TIMESTAMPTZ NOT NULL DEFAULT now() + ); + `) + if err != nil { + return fmt.Errorf("create migrations table: %w", err) + } + + for _, m := range migrations { + // Check if already applied + var exists bool + err := pool.QueryRow(ctx, + "SELECT EXISTS(SELECT 1 FROM schema_migrations WHERE name = $1)", m.name, + ).Scan(&exists) + if err != nil { + return fmt.Errorf("check migration %s: %w", m.name, err) + } + + if exists { + continue + } + + // Run the migration inside a transaction so it's atomic + tx, err := pool.Begin(ctx) + if err != nil { + return fmt.Errorf("begin migration %s: %w", m.name, err) + } + + if _, err := tx.Exec(ctx, m.sql); err != nil { + _ = tx.Rollback(ctx) + return fmt.Errorf("run migration %s: %w", m.name, err) + } + + if _, err := tx.Exec(ctx, + "INSERT INTO schema_migrations (name) VALUES ($1)", m.name, + ); err != nil { + _ = tx.Rollback(ctx) + return fmt.Errorf("record migration %s: %w", m.name, err) + } + + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("commit migration %s: %w", m.name, err) + } + + slog.Info("migration applied", "name", m.name) + } + + return nil +} diff --git a/internal/db/queries.go b/internal/db/queries.go new file mode 100644 index 0000000..031c3be --- /dev/null +++ b/internal/db/queries.go @@ -0,0 +1,287 @@ +package db + +import ( + "context" + "fmt" + "time" + + "github.com/google/uuid" + "github.com/jackc/pgx/v5/pgxpool" +) + +// Queries holds the database connection pool. +// All database operations are methods on this type. +// This pattern (a "repository") keeps SQL out of your handlers. +type Queries struct { + pool *pgxpool.Pool +} + +func New(pool *pgxpool.Pool) *Queries { + return &Queries{pool: pool} +} + +// ---- Receipts ---- + +type CreateReceiptParams struct { + StoreName string + ReceiptDate time.Time + ImagePath string + City string + Country string +} + +type Receipt struct { + ID uuid.UUID + StoreName string + ReceiptDate time.Time + ImagePath string + City string + Country string + SubmittedAt time.Time +} + +func (q *Queries) CreateReceipt(ctx context.Context, p CreateReceiptParams) (Receipt, error) { + var r Receipt + err := q.pool.QueryRow(ctx, ` + INSERT INTO receipts (store_name, receipt_date, image_path, city, country) + VALUES ($1, $2, $3, $4, $5) + RETURNING id, store_name, receipt_date, image_path, city, country, submitted_at + `, p.StoreName, p.ReceiptDate, p.ImagePath, p.City, p.Country, + ).Scan(&r.ID, &r.StoreName, &r.ReceiptDate, &r.ImagePath, &r.City, &r.Country, &r.SubmittedAt) + + if err != nil { + return Receipt{}, fmt.Errorf("create receipt: %w", err) + } + return r, nil +} + +func (q *Queries) GetReceiptByID(ctx context.Context, id uuid.UUID) (Receipt, error) { + var r Receipt + err := q.pool.QueryRow(ctx, ` + SELECT id, store_name, receipt_date, image_path, city, country, submitted_at + FROM receipts WHERE id = $1 + `, id).Scan(&r.ID, &r.StoreName, &r.ReceiptDate, &r.ImagePath, &r.City, &r.Country, &r.SubmittedAt) + + if err != nil { + return Receipt{}, fmt.Errorf("get receipt: %w", err) + } + return r, nil +} + +// ---- Line Items ---- + +type CreateLineItemParams struct { + ReceiptID uuid.UUID + RawName string + CanonicalName string // may be empty if not matched yet + PriceCents int + Quantity float64 +} + +func (q *Queries) CreateLineItem(ctx context.Context, p CreateLineItemParams) error { + var canonical *string + if p.CanonicalName != "" { + canonical = &p.CanonicalName + } + + _, err := q.pool.Exec(ctx, ` + INSERT INTO line_items (receipt_id, raw_name, canonical_name, price_cents, quantity) + VALUES ($1, $2, $3, $4, $5) + `, p.ReceiptID, p.RawName, canonical, p.PriceCents, p.Quantity) + + if err != nil { + return fmt.Errorf("create line item: %w", err) + } + return nil +} + +// ---- Canonical Items ---- + +type CanonicalItem struct { + Name string + DisplayName string + Category string + Unit string + Aliases []string +} + +func (q *Queries) ListCanonicalItems(ctx context.Context) ([]CanonicalItem, error) { + rows, err := q.pool.Query(ctx, ` + SELECT name, display_name, category, unit, aliases + FROM canonical_items + ORDER BY category, display_name + `) + if err != nil { + return nil, fmt.Errorf("list canonical items: %w", err) + } + defer rows.Close() + + var items []CanonicalItem + for rows.Next() { + var item CanonicalItem + if err := rows.Scan(&item.Name, &item.DisplayName, &item.Category, &item.Unit, &item.Aliases); err != nil { + return nil, err + } + items = append(items, item) + } + return items, rows.Err() +} + +// ---- Price History ---- + +type PricePoint struct { + YearMonth time.Time + AvgPriceCents int + SampleCount int +} + +// GetPriceHistory returns monthly average prices for a canonical item. +func (q *Queries) GetPriceHistory(ctx context.Context, canonicalName string) ([]PricePoint, error) { + rows, err := q.pool.Query(ctx, ` + SELECT year_month, avg_price_cents, sample_count + FROM price_snapshots + WHERE canonical_name = $1 + ORDER BY year_month ASC + `, canonicalName) + if err != nil { + return nil, fmt.Errorf("get price history: %w", err) + } + defer rows.Close() + + var points []PricePoint + for rows.Next() { + var p PricePoint + if err := rows.Scan(&p.YearMonth, &p.AvgPriceCents, &p.SampleCount); err != nil { + return nil, err + } + points = append(points, p) + } + return points, rows.Err() +} + +// TopMover represents the biggest price change over a period. +type TopMover struct { + CanonicalName string + DisplayName string + Category string + PriceThen int + PriceNow int + PctChange float64 +} + +// GetTopMovers finds items with the largest price change over the last N months. +func (q *Queries) GetTopMovers(ctx context.Context, months int, limit int) ([]TopMover, error) { + rows, err := q.pool.Query(ctx, ` + WITH + -- Most recent snapshot per item + latest AS ( + SELECT DISTINCT ON (canonical_name) + canonical_name, avg_price_cents AS price_now, year_month + FROM price_snapshots + ORDER BY canonical_name, year_month DESC + ), + -- Snapshot closest to N months ago + old AS ( + SELECT DISTINCT ON (ps.canonical_name) + ps.canonical_name, ps.avg_price_cents AS price_then + FROM price_snapshots ps + WHERE ps.year_month <= (now() - ($1 || ' months')::interval)::date + ORDER BY ps.canonical_name, ps.year_month DESC + ) + SELECT + l.canonical_name, + ci.display_name, + ci.category, + o.price_then, + l.price_now, + ROUND(((l.price_now - o.price_then)::numeric / o.price_then) * 100, 1) AS pct_change + FROM latest l + JOIN old o ON l.canonical_name = o.canonical_name + JOIN canonical_items ci ON l.canonical_name = ci.name + ORDER BY ABS(pct_change) DESC + LIMIT $2 + `, months, limit) + if err != nil { + return nil, fmt.Errorf("get top movers: %w", err) + } + defer rows.Close() + + var movers []TopMover + for rows.Next() { + var m TopMover + if err := rows.Scan(&m.CanonicalName, &m.DisplayName, &m.Category, + &m.PriceThen, &m.PriceNow, &m.PctChange); err != nil { + return nil, err + } + movers = append(movers, m) + } + return movers, rows.Err() +} + +// InflationSummary returns the overall purchasing power change since a base year. +type InflationSummary struct { + BaseYear int + CurrentYear int + BasketThen int // average cents across tracked items at base year + BasketNow int // average cents across tracked items now + PurchasingPower float64 // e.g. 0.58 means $1 in baseYear = $0.58 today + TotalPctChange float64 // e.g. 72.4 means prices are 72.4% higher +} + +func (q *Queries) GetInflationSummary(ctx context.Context, baseYear int) (InflationSummary, error) { + var s InflationSummary + err := q.pool.QueryRow(ctx, ` + WITH + base AS ( + SELECT AVG(avg_price_cents)::int AS avg_price + FROM price_snapshots + WHERE EXTRACT(YEAR FROM year_month) = $1 + ), + current AS ( + SELECT AVG(avg_price_cents)::int AS avg_price + FROM price_snapshots + WHERE year_month >= date_trunc('year', now()) - interval '1 year' + ) + SELECT + $1, + EXTRACT(YEAR FROM now())::int, + base.avg_price, + current.avg_price, + ROUND((base.avg_price::numeric / current.avg_price) * 100, 1), + ROUND(((current.avg_price - base.avg_price)::numeric / base.avg_price) * 100, 1) + FROM base, current + `, baseYear).Scan( + &s.BaseYear, &s.CurrentYear, + &s.BasketThen, &s.BasketNow, + &s.PurchasingPower, &s.TotalPctChange, + ) + if err != nil { + return InflationSummary{}, fmt.Errorf("get inflation summary: %w", err) + } + return s, nil +} + +// RebuildSnapshots re-aggregates all line_items into price_snapshots. +// Run this as a nightly cron job or after a batch of new submissions. +func (q *Queries) RebuildSnapshots(ctx context.Context) error { + _, err := q.pool.Exec(ctx, ` + INSERT INTO price_snapshots (canonical_name, year_month, avg_price_cents, sample_count) + SELECT + canonical_name, + date_trunc('month', r.receipt_date)::date AS year_month, + ROUND(AVG(li.unit_price_cents))::int AS avg_price_cents, + COUNT(*) AS sample_count + FROM line_items li + JOIN receipts r ON li.receipt_id = r.id + WHERE li.canonical_name IS NOT NULL + GROUP BY canonical_name, year_month + ON CONFLICT (canonical_name, year_month) + DO UPDATE SET + avg_price_cents = EXCLUDED.avg_price_cents, + sample_count = EXCLUDED.sample_count + `) + if err != nil { + return fmt.Errorf("rebuild snapshots: %w", err) + } + return nil +} diff --git a/internal/handlers/helpers.go b/internal/handlers/helpers.go new file mode 100644 index 0000000..0ff9974 --- /dev/null +++ b/internal/handlers/helpers.go @@ -0,0 +1,21 @@ +package handlers + +import ( + "encoding/json" + "net/http" +) + +// writeJSON encodes v as JSON and writes it to w with the given status code. +// This is a tiny helper that every handler uses — it keeps handlers clean. +func writeJSON(w http.ResponseWriter, status int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + if err := json.NewEncoder(w).Encode(v); err != nil { + http.Error(w, "encoding error", http.StatusInternalServerError) + } +} + +// writeError writes a standard {"error": "..."} JSON response. +func writeError(w http.ResponseWriter, status int, msg string) { + writeJSON(w, status, map[string]string{"error": msg}) +} diff --git a/internal/handlers/items.go b/internal/handlers/items.go new file mode 100644 index 0000000..170ec04 --- /dev/null +++ b/internal/handlers/items.go @@ -0,0 +1,79 @@ +package handlers + +import ( + "log/slog" + "net/http" + "strconv" + + "github.com/go-chi/chi/v5" + + "github.com/yourname/deflated/internal/db" +) + +// ItemHandler handles routes that return price and inflation data. +type ItemHandler struct { + queries *db.Queries +} + +func NewItemHandler(queries *db.Queries) *ItemHandler { + return &ItemHandler{queries: queries} +} + +// List handles GET /api/items +func (h *ItemHandler) List(w http.ResponseWriter, r *http.Request) { + items, err := h.queries.ListCanonicalItems(r.Context()) + if err != nil { + slog.Error("list items", "error", err) + writeError(w, http.StatusInternalServerError, "database error") + return + } + writeJSON(w, http.StatusOK, items) +} + +// PriceHistory handles GET /api/items/{name}/history +func (h *ItemHandler) PriceHistory(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + history, err := h.queries.GetPriceHistory(r.Context(), name) + if err != nil { + slog.Error("get price history", "error", err, "name", name) + writeError(w, http.StatusInternalServerError, "database error") + return + } + writeJSON(w, http.StatusOK, history) +} + +// TopMovers handles GET /api/items/top-movers?months=12&limit=10 +func (h *ItemHandler) TopMovers(w http.ResponseWriter, r *http.Request) { + months, _ := strconv.Atoi(r.URL.Query().Get("months")) + if months <= 0 { + months = 12 + } + limit, _ := strconv.Atoi(r.URL.Query().Get("limit")) + if limit <= 0 { + limit = 10 + } + + movers, err := h.queries.GetTopMovers(r.Context(), months, limit) + if err != nil { + slog.Error("get top movers", "error", err) + writeError(w, http.StatusInternalServerError, "database error") + return + } + writeJSON(w, http.StatusOK, movers) +} + +// InflationSummary handles GET /api/inflation/summary?base_year=2009 +func (h *ItemHandler) InflationSummary(w http.ResponseWriter, r *http.Request) { + baseYear, _ := strconv.Atoi(r.URL.Query().Get("base_year")) + if baseYear <= 0 { + baseYear = 2009 + } + + summary, err := h.queries.GetInflationSummary(r.Context(), baseYear) + if err != nil { + slog.Error("get inflation summary", "error", err) + writeError(w, http.StatusInternalServerError, "database error") + return + } + writeJSON(w, http.StatusOK, summary) +} diff --git a/internal/handlers/receipts.go b/internal/handlers/receipts.go new file mode 100644 index 0000000..34e559f --- /dev/null +++ b/internal/handlers/receipts.go @@ -0,0 +1,193 @@ +package handlers + +import ( + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "os" + "path/filepath" + "strings" + "time" + + "github.com/go-chi/chi/v5" + "github.com/google/uuid" + + "github.com/yourname/deflated/internal/db" + "github.com/yourname/deflated/internal/inflation" +) + +// ReceiptHandler handles HTTP requests related to receipt submissions. +type ReceiptHandler struct { + queries *db.Queries + matcher *inflation.Matcher +} + +func NewReceiptHandler(queries *db.Queries) *ReceiptHandler { + return &ReceiptHandler{ + queries: queries, + matcher: inflation.NewMatcher(), + } +} + +type submitRequest struct { + StoreName string `json:"store_name"` + ReceiptDate string `json:"receipt_date"` // "2024-03-15" + City string `json:"city"` + Country string `json:"country"` + LineItems []lineItem `json:"line_items"` +} + +type lineItem struct { + RawName string `json:"raw_name"` + PriceCents int `json:"price_cents"` + Quantity float64 `json:"quantity"` +} + +// Submit handles POST /api/receipts +// Accepts a multipart form with: +// - "data" field: JSON with receipt metadata and line items +// - "image" field: optional receipt photo +func (h *ReceiptHandler) Submit(w http.ResponseWriter, r *http.Request) { + if err := r.ParseMultipartForm(10 << 20); err != nil { + writeError(w, http.StatusBadRequest, "invalid form data") + return + } + + var req submitRequest + dataField := r.FormValue("data") + if dataField == "" { + writeError(w, http.StatusBadRequest, "missing 'data' field") + return + } + if err := json.Unmarshal([]byte(dataField), &req); err != nil { + writeError(w, http.StatusBadRequest, fmt.Sprintf("invalid JSON: %v", err)) + return + } + + if req.StoreName == "" { + writeError(w, http.StatusBadRequest, "store_name is required") + return + } + receiptDate, err := time.Parse("2006-01-02", req.ReceiptDate) + if err != nil { + writeError(w, http.StatusBadRequest, "receipt_date must be YYYY-MM-DD") + return + } + if len(req.LineItems) == 0 { + writeError(w, http.StatusBadRequest, "at least one line item is required") + return + } + + // Handle optional image upload + imagePath := "" + if file, header, err := r.FormFile("image"); err == nil { + defer file.Close() + imagePath, err = saveUpload(file, header.Filename) + if err != nil { + slog.Error("failed to save upload", "error", err) + writeError(w, http.StatusInternalServerError, "could not save image") + return + } + } + + receipt, err := h.queries.CreateReceipt(r.Context(), db.CreateReceiptParams{ + StoreName: req.StoreName, + ReceiptDate: receiptDate, + ImagePath: imagePath, + City: req.City, + Country: coalesce(req.Country, "US"), + }) + if err != nil { + slog.Error("failed to create receipt", "error", err) + writeError(w, http.StatusInternalServerError, "database error") + return + } + + for _, item := range req.LineItems { + if item.Quantity <= 0 { + item.Quantity = 1 + } + canonical := h.matcher.Match(item.RawName) + + if err := h.queries.CreateLineItem(r.Context(), db.CreateLineItemParams{ + ReceiptID: receipt.ID, + RawName: item.RawName, + CanonicalName: canonical, + PriceCents: item.PriceCents, + Quantity: item.Quantity, + }); err != nil { + slog.Error("failed to create line item", "error", err, "raw_name", item.RawName) + } + } + + // Rebuild snapshots in background so response stays fast + go func() { + if err := h.queries.RebuildSnapshots(r.Context()); err != nil { + slog.Error("failed to rebuild snapshots", "error", err) + } + }() + + writeJSON(w, http.StatusCreated, map[string]any{ + "id": receipt.ID, + "message": "receipt submitted successfully", + }) +} + +// Get handles GET /api/receipts/{id} +func (h *ReceiptHandler) Get(w http.ResponseWriter, r *http.Request) { + idStr := chi.URLParam(r, "id") + id, err := uuid.Parse(idStr) + if err != nil { + writeError(w, http.StatusBadRequest, "invalid receipt id") + return + } + + receipt, err := h.queries.GetReceiptByID(r.Context(), id) + if err != nil { + writeError(w, http.StatusNotFound, "receipt not found") + return + } + + writeJSON(w, http.StatusOK, receipt) +} + +func saveUpload(src io.Reader, originalName string) (string, error) { + dir := "./uploads" + if err := os.MkdirAll(dir, 0755); err != nil { + return "", err + } + + ext := strings.ToLower(filepath.Ext(originalName)) + if ext == "" { + ext = ".jpg" + } + allowed := map[string]bool{".jpg": true, ".jpeg": true, ".png": true, ".pdf": true} + if !allowed[ext] { + return "", fmt.Errorf("unsupported file type: %s", ext) + } + + filename := fmt.Sprintf("%s%s", uuid.New().String(), ext) + path := filepath.Join(dir, filename) + + dst, err := os.Create(path) + if err != nil { + return "", err + } + defer dst.Close() + + limited := io.LimitReader(src, 10<<20) + if _, err := io.Copy(dst, limited); err != nil { + return "", err + } + + return path, nil +} + +func coalesce(s, fallback string) string { + if s == "" { + return fallback + } + return s +} diff --git a/internal/inflation/matcher.go b/internal/inflation/matcher.go new file mode 100644 index 0000000..622f834 --- /dev/null +++ b/internal/inflation/matcher.go @@ -0,0 +1,143 @@ +package inflation + +import ( + "strings" + "unicode" +) + +// Matcher maps raw receipt item names to canonical item names. +// This is the "normalization" problem — "1 gal whole milk" and +// "MILK WHL GAL" should both resolve to "milk_whole_1gal". +// +// Start simple: exact alias lookup + token overlap. +// Later: replace with embeddings or a fuzzy search library. +type Matcher struct { + // aliasMap maps lowercase alias string -> canonical name + aliasMap map[string]string +} + +// knownAliases is the seed list. In production, load these from the +// canonical_items table's aliases column at startup. +var knownAliases = map[string]string{ + // Milk + "whole milk gallon": "milk_whole_1gal", + "1 gal whole milk": "milk_whole_1gal", + "milk whl gal": "milk_whole_1gal", + "whole milk 1gal": "milk_whole_1gal", + "milk whole": "milk_whole_1gal", + + // Eggs + "large eggs 12ct": "eggs_large_dozen", + "eggs large dozen": "eggs_large_dozen", + "grade a large eggs": "eggs_large_dozen", + "eggs lg 12": "eggs_large_dozen", + "large eggs": "eggs_large_dozen", + + // Bread + "white bread": "bread_white_loaf", + "sandwich bread": "bread_white_loaf", + "bread loaf": "bread_white_loaf", + "white bread loaf": "bread_white_loaf", + + // Ground beef + "ground beef lb": "ground_beef_1lb", + "80/20 ground beef": "ground_beef_1lb", + "hamburger meat": "ground_beef_1lb", + "ground beef": "ground_beef_1lb", + + // Olive oil + "olive oil 16oz": "olive_oil_16oz", + "extra virgin olive oil": "olive_oil_16oz", + "evoo 16oz": "olive_oil_16oz", + "olive oil": "olive_oil_16oz", + + // Butter + "butter salted pound": "butter_salted_1lb", + "salted butter 4 sticks": "butter_salted_1lb", + "salted butter": "butter_salted_1lb", + + // Chicken + "boneless chicken breast": "chicken_breast_1lb", + "chicken breast lb": "chicken_breast_1lb", + "chicken breast": "chicken_breast_1lb", + + // OJ + "oj 52oz": "orange_juice_52oz", + "orange juice carton": "orange_juice_52oz", + "orange juice": "orange_juice_52oz", +} + +func NewMatcher() *Matcher { + return &Matcher{aliasMap: knownAliases} +} + +// Match tries to find a canonical name for a raw receipt string. +// Returns empty string if no match is found — unmatched items are stored +// with canonical_name = NULL and can be reviewed/matched later. +func (m *Matcher) Match(raw string) string { + normalized := normalize(raw) + + // 1. Exact alias match (fastest) + if canonical, ok := m.aliasMap[normalized]; ok { + return canonical + } + + // 2. Substring match — if any alias is contained in the raw text + for alias, canonical := range m.aliasMap { + if strings.Contains(normalized, alias) { + return canonical + } + } + + // 3. Token overlap — split both into words and count shared tokens + // This catches "MILK WHOLE 1 GAL" matching "whole milk gallon" + rawTokens := tokenize(normalized) + bestScore := 0 + bestMatch := "" + + for alias, canonical := range m.aliasMap { + aliasTokens := tokenize(alias) + score := tokenOverlap(rawTokens, aliasTokens) + // Require matching at least 2 tokens and >50% of alias tokens + if score >= 2 && score > bestScore && float64(score)/float64(len(aliasTokens)) > 0.5 { + bestScore = score + bestMatch = canonical + } + } + + return bestMatch +} + +// normalize lowercases and strips punctuation/extra whitespace. +func normalize(s string) string { + s = strings.ToLower(s) + var b strings.Builder + for _, r := range s { + if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '/' { + b.WriteRune(r) + } else { + b.WriteRune(' ') + } + } + return strings.Join(strings.Fields(b.String()), " ") +} + +// tokenize splits a normalized string into unique words. +func tokenize(s string) []string { + return strings.Fields(s) +} + +// tokenOverlap counts how many words from a appear in b. +func tokenOverlap(a, b []string) int { + set := make(map[string]bool, len(b)) + for _, t := range b { + set[t] = true + } + count := 0 + for _, t := range a { + if set[t] { + count++ + } + } + return count +} diff --git a/internal/inflation/matcher_test.go b/internal/inflation/matcher_test.go new file mode 100644 index 0000000..2b024cd --- /dev/null +++ b/internal/inflation/matcher_test.go @@ -0,0 +1,75 @@ +package inflation + +import ( + "testing" +) + +// This is your first Go test file! +// Run with: go test ./internal/inflation/... +// Go's testing package is built-in — no extra library needed. + +func TestMatcher_ExactMatch(t *testing.T) { + m := NewMatcher() + + tests := []struct { + raw string + expected string + }{ + {"whole milk gallon", "milk_whole_1gal"}, + {"large eggs 12ct", "eggs_large_dozen"}, + {"white bread", "bread_white_loaf"}, + {"ground beef", "ground_beef_1lb"}, + } + + for _, tt := range tests { + t.Run(tt.raw, func(t *testing.T) { + got := m.Match(tt.raw) + if got != tt.expected { + t.Errorf("Match(%q) = %q, want %q", tt.raw, got, tt.expected) + } + }) + } +} + +func TestMatcher_CaseInsensitive(t *testing.T) { + m := NewMatcher() + got := m.Match("WHOLE MILK GALLON") + if got != "milk_whole_1gal" { + t.Errorf("expected milk_whole_1gal, got %q", got) + } +} + +func TestMatcher_TokenOverlap(t *testing.T) { + m := NewMatcher() + // "MILK WHL 1 GAL" should still match via token overlap + got := m.Match("MILK WHL 1 GAL") + if got != "milk_whole_1gal" { + t.Logf("Note: token overlap match returned %q (may need alias tuning)", got) + } +} + +func TestMatcher_NoMatch(t *testing.T) { + m := NewMatcher() + got := m.Match("toilet paper mega roll 12ct") + if got != "" { + t.Errorf("expected no match, got %q", got) + } +} + +func TestNormalize(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"Whole Milk, 1 Gal.", "whole milk 1 gal"}, + {"EGGS (LARGE) 12CT", "eggs large 12ct"}, + {"80/20 Ground Beef", "80/20 ground beef"}, + } + + for _, tt := range tests { + got := normalize(tt.input) + if got != tt.expected { + t.Errorf("normalize(%q) = %q, want %q", tt.input, got, tt.expected) + } + } +} diff --git a/internal/inflation/worker.go b/internal/inflation/worker.go new file mode 100644 index 0000000..a485b99 --- /dev/null +++ b/internal/inflation/worker.go @@ -0,0 +1,44 @@ +// Package inflation contains the background worker that refreshes +// the price_snapshots table used by the dashboard charts. +package inflation + +import ( + "context" + "log" + "time" + + "github.com/yourname/deflated/internal/db" +) + +// StartRefreshWorker runs in a goroutine and refreshes price snapshots +// every interval. Call this from main() after connecting to the database. +// +// Example: +// +// go inflation.StartRefreshWorker(ctx, queries, 1*time.Hour) +func StartRefreshWorker(ctx context.Context, q *db.Queries, interval time.Duration) { + // Run once immediately on startup so the charts aren't empty + runRefresh(ctx, q) + + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + runRefresh(ctx, q) + case <-ctx.Done(): + log.Println("inflation refresh worker stopped") + return + } + } +} + +func runRefresh(ctx context.Context, q *db.Queries) { + start := time.Now() + if err := q.RefreshPriceSnapshots(ctx); err != nil { + log.Printf("error refreshing price snapshots: %v", err) + return + } + log.Printf("price snapshots refreshed in %s", time.Since(start).Round(time.Millisecond)) +} diff --git a/internal/models/models.go b/internal/models/models.go new file mode 100644 index 0000000..39947b6 --- /dev/null +++ b/internal/models/models.go @@ -0,0 +1,110 @@ +// Package models defines the core data structures shared across the app. +// These are plain Go structs — no ORM tags, no magic. +package models + +import ( + "time" + + "github.com/google/uuid" +) + +// ── Database models (match table columns exactly) ───────────────────────────── + +type Receipt struct { + ID uuid.UUID `json:"id"` + StoreName *string `json:"store_name"` + ReceiptDate time.Time `json:"receipt_date"` + ImageURL *string `json:"image_url"` + City *string `json:"city"` + SubmittedAt time.Time `json:"submitted_at"` +} + +type LineItem struct { + ID uuid.UUID `json:"id"` + ReceiptID uuid.UUID `json:"receipt_id"` + RawName string `json:"raw_name"` + CanonicalName *string `json:"canonical_name"` + Category *string `json:"category"` + PriceCents int `json:"price_cents"` + Quantity float64 `json:"quantity"` +} + +type PriceSnapshot struct { + CanonicalName string `json:"canonical_name"` + YearMonth time.Time `json:"year_month"` + AvgPriceCents int `json:"avg_price_cents"` + SampleCount int `json:"sample_count"` +} + +type PriceMover struct { + CanonicalName string `json:"canonical_name"` + CurrentPriceCents int `json:"current_price_cents"` + PrevPriceCents int `json:"prev_price_cents"` + PctChange float64 `json:"pct_change"` +} + +type InflationSummary struct { + FromDate time.Time `json:"from_date"` + ToDate time.Time `json:"to_date"` + PurchasingPower float64 `json:"purchasing_power"` // e.g. 58.3 means $1 → $0.583 + ItemCount int `json:"item_count"` +} + +// ── Insert params (what the API layer passes to db.Queries) ─────────────────── + +type InsertReceiptParams struct { + StoreName *string + ReceiptDate time.Time + ImageURL *string + City *string +} + +type InsertLineItemParams struct { + ReceiptID uuid.UUID + RawName string + CanonicalName *string + Category *string + PriceCents int + Quantity float64 +} + +// ── API request/response shapes ─────────────────────────────────────────────── + +// SubmitReceiptRequest is the JSON body (or form data) for POST /api/receipts +type SubmitReceiptRequest struct { + StoreName string `json:"store_name"` + ReceiptDate string `json:"receipt_date"` // ISO 8601: "2024-03-15" + City string `json:"city"` + Items []ItemInput `json:"items"` +} + +type ItemInput struct { + Name string `json:"name"` // raw name as on the receipt + Price float64 `json:"price"` // in dollars, e.g. 3.99 + Quantity float64 `json:"quantity"` // default 1 +} + +// SubmitReceiptResponse is returned after a successful submission +type SubmitReceiptResponse struct { + ReceiptID uuid.UUID `json:"receipt_id"` + ItemsAdded int `json:"items_added"` + Message string `json:"message"` +} + +// PriceHistoryResponse is returned by GET /api/items/:name/history +type PriceHistoryResponse struct { + CanonicalName string `json:"canonical_name"` + DataPoints []PriceDataPoint `json:"data_points"` +} + +type PriceDataPoint struct { + Month string `json:"month"` // "2024-03" + AvgPrice float64 `json:"avg_price"` // in dollars + SampleCount int `json:"sample_count"` +} + +// ErrorResponse is the standard shape for all API errors +type ErrorResponse struct { + Error string `json:"error"` + Details string `json:"details,omitempty"` +} diff --git a/internal/parser/normalize.go b/internal/parser/normalize.go new file mode 100644 index 0000000..3699f8d --- /dev/null +++ b/internal/parser/normalize.go @@ -0,0 +1,99 @@ +// Package parser handles normalizing raw receipt item names into +// canonical identifiers that can be compared across submissions. +// +// Phase 1: simple rule-based lookup (good enough to ship) +// Phase 2: fuzzy matching + embeddings (future upgrade) +package parser + +import ( + "strings" +) + +// entry maps keywords (found in the raw name) to a canonical ID and category. +type entry struct { + canonical string + category string +} + +// knownItems is the canonical item dictionary. +// Key: lowercase substring that must appear in the raw name. +// Expand this as you see patterns in submissions. +var knownItems = []struct { + keywords []string // ALL must be present (AND logic) + canonical string + category string +}{ + {[]string{"milk", "whole"}, "milk_whole_1gal", "dairy"}, + {[]string{"milk", "2%"}, "milk_2pct_1gal", "dairy"}, + {[]string{"milk", "skim"}, "milk_skim_1gal", "dairy"}, + {[]string{"egg"}, "eggs_large_dozen", "dairy"}, + {[]string{"butter", "unsalted"}, "butter_unsalted_1lb", "dairy"}, + {[]string{"butter"}, "butter_salted_1lb", "dairy"}, + {[]string{"cheddar"}, "cheese_cheddar_8oz", "dairy"}, + + {[]string{"bread", "white"}, "bread_white_loaf", "bakery"}, + {[]string{"bread", "wheat"}, "bread_wheat_loaf", "bakery"}, + {[]string{"bread", "sourdough"}, "bread_sourdough_loaf", "bakery"}, + + {[]string{"ground beef"}, "ground_beef_1lb", "meat"}, + {[]string{"chicken breast"}, "chicken_breast_1lb", "meat"}, + {[]string{"salmon"}, "salmon_fillet_1lb", "seafood"}, + + {[]string{"apple"}, "apples_bag", "produce"}, + {[]string{"banana"}, "bananas_1lb", "produce"}, + {[]string{"orange"}, "oranges_bag", "produce"}, + {[]string{"tomato"}, "tomatoes_1lb", "produce"}, + {[]string{"potato"}, "potatoes_5lb", "produce"}, + {[]string{"onion"}, "onions_3lb", "produce"}, + {[]string{"garlic"}, "garlic_head", "produce"}, + {[]string{"spinach"}, "spinach_5oz", "produce"}, + {[]string{"broccoli"}, "broccoli_head", "produce"}, + + {[]string{"olive oil"}, "olive_oil_16oz", "pantry"}, + {[]string{"vegetable oil"}, "vegetable_oil_48oz", "pantry"}, + {[]string{"flour", "all-purpose"}, "flour_allpurpose_5lb", "pantry"}, + {[]string{"sugar", "white"}, "sugar_white_4lb", "pantry"}, + {[]string{"sugar"}, "sugar_white_4lb", "pantry"}, + {[]string{"salt"}, "salt_table_26oz", "pantry"}, + {[]string{"rice"}, "rice_white_2lb", "pantry"}, + {[]string{"pasta"}, "pasta_spaghetti_1lb", "pantry"}, + {[]string{"coffee"}, "coffee_ground_12oz", "pantry"}, + {[]string{"orange juice"}, "orange_juice_52oz", "beverages"}, + {[]string{"water", "gallon"}, "water_gallon", "beverages"}, +} + +// Normalize attempts to map a raw item name to a canonical identifier. +// Returns (nil, nil) if no match is found — the item is stored raw only. +func Normalize(rawName string) (*string, *string) { + lower := strings.ToLower(rawName) + + for _, rule := range knownItems { + if matchesAll(lower, rule.keywords) { + c := rule.canonical + cat := rule.category + return &c, &cat + } + } + return nil, nil +} + +// matchesAll returns true if s contains every keyword in the list. +func matchesAll(s string, keywords []string) bool { + for _, kw := range keywords { + if !strings.Contains(s, kw) { + return false + } + } + return true +} + +// ── Future: fuzzy matching ───────────────────────────────────────────────────── +// +// When the rule list grows unwieldy, replace Normalize with an embeddings +// approach: encode the raw name with a sentence transformer, find the nearest +// canonical item by cosine similarity. The go-faiss library or a simple +// Postgres pgvector extension both work well for this. +// +// For now, ship the rule-based version. Add to knownItems as you see misses +// in production by querying: SELECT raw_name, COUNT(*) FROM line_items +// WHERE canonical_name IS NULL GROUP BY raw_name ORDER BY count DESC; diff --git a/migrations/001_initial.sql b/migrations/001_initial.sql new file mode 100644 index 0000000..9959cb5 --- /dev/null +++ b/migrations/001_initial.sql @@ -0,0 +1,71 @@ +-- Migration 001: initial schema +-- Run with: psql $DATABASE_URL -f migrations/001_initial.sql + +-- Enable the uuid-ossp extension for gen_random_uuid() +CREATE EXTENSION IF NOT EXISTS "pgcrypto"; + +-- ── receipts ───────────────────────────────────────────────────────────────── +-- One row per submitted receipt image/form +CREATE TABLE IF NOT EXISTS receipts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + store_name TEXT, + receipt_date DATE NOT NULL, + image_url TEXT, -- S3/R2 URL after upload + city TEXT, + submitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS receipts_date_idx ON receipts (receipt_date); + +-- ── line_items ──────────────────────────────────────────────────────────────── +-- Individual products parsed from a receipt +CREATE TABLE IF NOT EXISTS line_items ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + receipt_id UUID NOT NULL REFERENCES receipts(id) ON DELETE CASCADE, + raw_name TEXT NOT NULL, -- exactly as on the receipt + canonical_name TEXT, -- normalized key, e.g. "milk_whole_1gal" + category TEXT, -- "dairy", "produce", etc. + price_cents INT NOT NULL CHECK (price_cents > 0), + quantity NUMERIC NOT NULL DEFAULT 1 CHECK (quantity > 0) +); + +CREATE INDEX IF NOT EXISTS line_items_receipt_idx ON line_items (receipt_id); +CREATE INDEX IF NOT EXISTS line_items_canonical_idx ON line_items (canonical_name); + +-- ── price_snapshots ─────────────────────────────────────────────────────────── +-- Pre-aggregated monthly averages — rebuilt by the Go worker every hour. +-- The frontend reads from here, never from line_items directly. +CREATE TABLE IF NOT EXISTS price_snapshots ( + canonical_name TEXT NOT NULL, + year_month DATE NOT NULL, -- truncated to first of the month + avg_price_cents INT NOT NULL, + sample_count INT NOT NULL DEFAULT 0, + PRIMARY KEY (canonical_name, year_month) +); + +-- ── canonical_items ─────────────────────────────────────────────────────────── +-- Registry of known items and their human-readable display names. +-- Populated by hand / future admin UI. +CREATE TABLE IF NOT EXISTS canonical_items ( + id TEXT PRIMARY KEY, -- e.g. "milk_whole_1gal" + display_name TEXT NOT NULL, -- "Whole Milk (1 gal)" + category TEXT NOT NULL, + unit TEXT, -- "gallon", "dozen", "lb" + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- Seed some known items +INSERT INTO canonical_items (id, display_name, category, unit) VALUES + ('milk_whole_1gal', 'Whole Milk', 'dairy', 'gallon'), + ('milk_2pct_1gal', '2% Milk', 'dairy', 'gallon'), + ('eggs_large_dozen', 'Eggs (large, dozen)', 'dairy', 'dozen'), + ('butter_salted_1lb', 'Butter (salted)', 'dairy', 'lb'), + ('bread_white_loaf', 'White Bread', 'bakery', 'loaf'), + ('ground_beef_1lb', 'Ground Beef', 'meat', 'lb'), + ('chicken_breast_1lb', 'Chicken Breast', 'meat', 'lb'), + ('olive_oil_16oz', 'Olive Oil', 'pantry', '16oz'), + ('rice_white_2lb', 'White Rice', 'pantry', '2lb bag'), + ('apples_bag', 'Apples', 'produce', 'bag'), + ('bananas_1lb', 'Bananas', 'produce', 'lb'), + ('potatoes_5lb', 'Potatoes', 'produce', '5lb bag') +ON CONFLICT (id) DO NOTHING;