From a1a2c2747cda8ad4c049d0d3b188e993daa67a01 Mon Sep 17 00:00:00 2001 From: Nicolai Ort Date: Fri, 20 Feb 2026 22:30:21 +0100 Subject: [PATCH] chore(PERFORMANCE_IDEAS): Remove outdated performance optimization ideas document --- PERFORMANCE_IDEAS.md | 589 ------------------------------------------- 1 file changed, 589 deletions(-) delete mode 100644 PERFORMANCE_IDEAS.md diff --git a/PERFORMANCE_IDEAS.md b/PERFORMANCE_IDEAS.md deleted file mode 100644 index a9096ca..0000000 --- a/PERFORMANCE_IDEAS.md +++ /dev/null @@ -1,589 +0,0 @@ -# Performance Optimization Ideas for LfK Backend - -This document outlines potential performance improvements for the LfK backend API, organized by impact and complexity. - ---- - -## βœ… Already Implemented - -### 1. Bun Runtime Migration -**Status**: Complete -**Impact**: 8-15% latency improvement -**Details**: Migrated from Node.js to Bun runtime, achieving: -- Parallel throughput: +8.3% (306 β†’ 331 scans/sec) -- Parallel p50 latency: -9.5% (21ms β†’ 19ms) - -### 2. NATS KV Cache for Scan Intake -**Status**: Complete (based on code analysis) -**Impact**: Significant reduction in DB reads for hot path -**Details**: `ScanController.stationIntake()` uses NATS JetStream KV store to cache: -- Station tokens (1-hour TTL) -- Cardβ†’Runner mappings (1-hour TTL) -- Runner state (no TTL, CAS-based updates) -- Eliminates DB reads on cache hits -- Prevents race conditions via compare-and-swap (CAS) - ---- - -## πŸš€ High Impact, Low-Medium Complexity - -### 3. Add Database Indexes -**Priority**: HIGH -**Complexity**: Low -**Estimated Impact**: 30-70% query time reduction - -**Problem**: TypeORM synchronize() doesn't automatically create indexes on foreign keys or commonly queried fields. - -**Observations**: -- Heavy use of `find()` with complex nested relations (e.g., `['runner', 'track', 'runner.scans', 'runner.group', 'runner.scans.track']`) -- No explicit `@Index()` decorators found in entity files -- Frequent filtering by foreign keys (runner_id, track_id, station_id, card_id) - -**Recommended Indexes**: - -```typescript -// src/models/entities/Scan.ts -@Index(['runner', 'timestamp']) // For runner scan history queries -@Index(['station', 'timestamp']) // For station-based queries -@Index(['card']) // For card lookup - -// src/models/entities/Runner.ts -@Index(['email']) // For authentication/lookup -@Index(['group']) // For group-based queries - -// src/models/entities/RunnerCard.ts -@Index(['runner']) // For cardβ†’runner lookups -@Index(['code']) // For barcode scans - -// src/models/entities/Donation.ts -@Index(['runner']) // For runner donations -@Index(['donor']) // For donor contributions -``` - -**Implementation Steps**: -1. Audit all entities and add `@Index()` decorators -2. Test query performance with `EXPLAIN` before/after -3. Monitor index usage with database tools -4. Consider composite indexes for frequently combined filters - -**Expected Results**: -- 50-70% faster JOIN operations -- 30-50% faster foreign key lookups -- Reduced database CPU usage - ---- - -### 4. Implement Query Result Caching -**Priority**: HIGH -**Complexity**: Medium -**Estimated Impact**: 50-90% latency reduction for repeated queries - -**Problem**: Stats endpoints and frequently accessed data (org totals, team rankings, runner lists) are recalculated on every request. - -**Observations**: -- `StatsController` methods load entire datasets with deep relations: - - `getRunnerStats()`: loads all runners with scans, groups, donations - - `getTeamStats()`: loads all teams with nested runner data - - `getOrgStats()`: loads all orgs with teams, runners, scans -- Many `find()` calls without any caching layer -- Data changes infrequently (only during scan intake) - -**Solution Options**: - -**Option A: NATS KV Cache (Recommended)** -```typescript -// src/nats/StatsKV.ts -export async function getOrgStatsCache(): Promise { - const kv = await NatsClient.getKV('stats_cache', { ttl: 60 * 1000 }); // 60s TTL - const entry = await kv.get('org_stats'); - return entry ? JSON.parse(entry.string()) : null; -} - -export async function setOrgStatsCache(stats: ResponseOrgStats[]): Promise { - const kv = await NatsClient.getKV('stats_cache', { ttl: 60 * 1000 }); - await kv.put('org_stats', JSON.stringify(stats)); -} - -// Invalidate on scan creation -// src/controllers/ScanController.ts (after line 173) -await invalidateStatsCache(); // Clear stats on new scan -``` - -**Option B: In-Memory Cache with TTL** -```typescript -// src/cache/MemoryCache.ts -import NodeCache from 'node-cache'; - -const cache = new NodeCache({ stdTTL: 60 }); // 60s TTL - -export function getCached(key: string): T | undefined { - return cache.get(key); -} - -export function setCached(key: string, value: T, ttl?: number): void { - cache.set(key, value, ttl); -} - -export function invalidatePattern(pattern: string): void { - const keys = cache.keys().filter(k => k.includes(pattern)); - cache.del(keys); -} -``` - -**Option C: Redis Cache** (if Redis is already in stack) - -**Recommended Cache Strategy**: -- **TTL**: 30-60 seconds for stats endpoints -- **Invalidation**: On scan creation, runner updates, donation changes -- **Keys**: `stats:org`, `stats:team:${id}`, `stats:runner:${id}` -- **Warm on startup**: Pre-populate cache for critical endpoints - -**Expected Results**: -- 80-90% latency reduction for stats endpoints (from ~500ms to ~50ms) -- 70-80% reduction in database load -- Improved user experience for dashboards and leaderboards - ---- - -### 5. Lazy Load Relations & DTOs -**Priority**: HIGH -**Complexity**: Medium -**Estimated Impact**: 40-60% query time reduction - -**Problem**: Many queries eagerly load deeply nested relations that aren't always needed. - -**Observations**: -```typescript -// Current: Loads everything -scan = await this.scanRepository.findOne( - { id: scan.id }, - { relations: ['runner', 'track', 'runner.scans', 'runner.group', - 'runner.scans.track', 'card', 'station'] } -); -``` - -**Solutions**: - -**A. Create Lightweight Response DTOs** -```typescript -// src/models/responses/ResponseScanLight.ts -export class ResponseScanLight { - @IsInt() id: number; - @IsInt() distance: number; - @IsInt() timestamp: number; - @IsBoolean() valid: boolean; - // Omit nested runner.scans, runner.group, etc. -} - -// Use for list views -@Get() -@ResponseSchema(ResponseScanLight, { isArray: true }) -async getAll() { - const scans = await this.scanRepository.find({ - relations: ['runner', 'track'] // Minimal relations - }); - return scans.map(s => new ResponseScanLight(s)); -} - -// Keep detailed DTO for single-item views -@Get('/:id') -@ResponseSchema(ResponseScan) // Full details -async getOne(@Param('id') id: number) { ... } -``` - -**B. Use Query Builder for Selective Loading** -```typescript -// Instead of loading all scans with runner relations: -const scans = await this.scanRepository - .createQueryBuilder('scan') - .leftJoinAndSelect('scan.runner', 'runner') - .leftJoinAndSelect('scan.track', 'track') - .select([ - 'scan.id', 'scan.distance', 'scan.timestamp', 'scan.valid', - 'runner.id', 'runner.firstname', 'runner.lastname', - 'track.id', 'track.name' - ]) - .where('scan.id = :id', { id }) - .getOne(); -``` - -**C. Implement GraphQL-style Field Selection** -```typescript -@Get() -async getAll(@QueryParam('fields') fields?: string) { - const relations = []; - if (fields?.includes('runner')) relations.push('runner'); - if (fields?.includes('track')) relations.push('track'); - return this.scanRepository.find({ relations }); -} -``` - -**Expected Results**: -- 40-60% faster list queries -- 50-70% reduction in data transfer size -- Reduced JOIN complexity and memory usage - ---- - -### 6. Pagination Optimization -**Priority**: MEDIUM -**Complexity**: Low -**Estimated Impact**: 20-40% improvement for large result sets - -**Problem**: Current pagination uses `skip/take` which becomes slow with large offsets. - -**Current Implementation**: -```typescript -// Inefficient for large page numbers (e.g., page=1000) -scans = await this.scanRepository.find({ - skip: page * page_size, // Scans 100,000 rows to skip them - take: page_size -}); -``` - -**Solutions**: - -**A. Cursor-Based Pagination (Recommended)** -```typescript -@Get() -async getAll( - @QueryParam('cursor') cursor?: number, // Last ID from previous page - @QueryParam('page_size') page_size: number = 100 -) { - const query = this.scanRepository.createQueryBuilder('scan') - .orderBy('scan.id', 'ASC') - .take(page_size + 1); // Get 1 extra to determine if more pages exist - - if (cursor) { - query.where('scan.id > :cursor', { cursor }); - } - - const scans = await query.getMany(); - const hasMore = scans.length > page_size; - const results = scans.slice(0, page_size); - const nextCursor = hasMore ? results[results.length - 1].id : null; - - return { - data: results.map(s => s.toResponse()), - pagination: { nextCursor, hasMore } - }; -} -``` - -**B. Add Total Count Caching** -```typescript -// Cache total counts to avoid expensive COUNT(*) queries -const totalCache = new Map(); - -async function getTotalCount(repo: Repository): Promise { - const cacheKey = repo.metadata.tableName; - const cached = totalCache.get(cacheKey); - - if (cached && cached.expires > Date.now()) { - return cached.count; - } - - const count = await repo.count(); - totalCache.set(cacheKey, { count, expires: Date.now() + 60000 }); // 60s TTL - return count; -} -``` - -**Expected Results**: -- 60-80% faster pagination for large page numbers -- Consistent query performance regardless of offset -- Better mobile app experience with cursor-based loading - ---- - -## πŸ”§ Medium Impact, Medium Complexity - -### 7. Database Connection Pooling Optimization -**Priority**: MEDIUM -**Complexity**: Medium -**Estimated Impact**: 10-20% improvement under load - -**Current**: Default TypeORM connection pooling (likely 10 connections) - -**Recommendations**: -```typescript -// ormconfig.js -module.exports = { - // ... existing config - extra: { - // PostgreSQL specific - max: 20, // Max pool size (adjust based on load) - min: 5, // Min pool size - idleTimeoutMillis: 30000, // Close idle connections after 30s - connectionTimeoutMillis: 2000, - - // MySQL specific - connectionLimit: 20, - waitForConnections: true, - queueLimit: 0 - }, - - // Enable query logging in dev to identify slow queries - logging: process.env.NODE_ENV !== 'production' ? ['query', 'error'] : ['error'], - maxQueryExecutionTime: 1000, // Log queries taking >1s -}; -``` - -**Monitor**: -- Connection pool exhaustion -- Query execution times -- Active connection count - ---- - -### 8. Bulk Operations for Import -**Priority**: MEDIUM -**Complexity**: Medium -**Estimated Impact**: 50-80% faster imports - -**Problem**: Import endpoints likely save entities one-by-one in loops. - -**Solution**: -```typescript -// Instead of: -for (const runnerData of importData) { - const runner = await createRunner.toEntity(); - await this.runnerRepository.save(runner); // N queries -} - -// Use bulk insert: -const runners = await Promise.all( - importData.map(data => createRunner.toEntity()) -); -await this.runnerRepository.save(runners); // 1 query - -// Or use raw query for massive imports: -await getConnection() - .createQueryBuilder() - .insert() - .into(Runner) - .values(runners) - .execute(); -``` - ---- - -### 9. Response Compression -**Priority**: MEDIUM -**Complexity**: Low -**Estimated Impact**: 60-80% reduction in response size - -**Implementation**: -```typescript -// src/app.ts -import compression from 'compression'; - -const app = createExpressServer({ ... }); -app.use(compression({ - level: 6, // Compression level (1-9) - threshold: 1024, // Only compress responses >1KB - filter: (req, res) => { - if (req.headers['x-no-compression']) return false; - return compression.filter(req, res); - } -})); -``` - -**Benefits**: -- 70-80% smaller JSON responses -- Faster transfer times on slow networks -- Reduced bandwidth costs - -**Dependencies**: `bun add compression @types/compression` - ---- - -## 🎯 Lower Priority / High Complexity - -### 10. Implement Read Replicas -**Priority**: LOW (requires infrastructure) -**Complexity**: High -**Estimated Impact**: 30-50% read query improvement - -**When to Consider**: -- Database CPU consistently >70% -- Read-heavy workload (already true for stats endpoints) -- Running PostgreSQL/MySQL in production - -**Implementation**: -```typescript -// ormconfig.js -module.exports = { - type: 'postgres', - replication: { - master: { - host: process.env.DB_WRITE_HOST, - port: 5432, - username: process.env.DB_USER, - password: process.env.DB_PASSWORD, - database: process.env.DB_NAME, - }, - slaves: [ - { - host: process.env.DB_READ_REPLICA_1, - port: 5432, - username: process.env.DB_USER, - password: process.env.DB_PASSWORD, - database: process.env.DB_NAME, - } - ] - } -}; -``` - ---- - -### 11. Move to Serverless/Edge Functions -**Priority**: LOW (architectural change) -**Complexity**: Very High -**Estimated Impact**: Variable (depends on workload) - -**Considerations**: -- Good for: Infrequent workloads, global distribution -- Bad for: High-frequency scan intake (cold starts) -- May conflict with TypeORM's connection model - ---- - -### 12. GraphQL API Layer -**Priority**: LOW (major refactor) -**Complexity**: Very High -**Estimated Impact**: 30-50% for complex queries - -**Benefits**: -- Clients request only needed fields -- Single request for complex nested data -- Better mobile app performance - -**Trade-offs**: -- Complete rewrite of controller layer -- Learning curve for frontend teams -- More complex caching strategy - ---- - -## πŸ“Š Recommended Implementation Order - -**Phase 1: Quick Wins** (1-2 weeks) -1. Add database indexes β†’ Controllers still work, immediate improvement -2. Enable response compression β†’ One-line change in `app.ts` -3. Implement cursor-based pagination β†’ Better mobile UX - -**Phase 2: Caching Layer** (2-3 weeks) -4. Add NATS KV cache for stats endpoints -5. Create lightweight response DTOs for list views -6. Cache total counts for pagination - -**Phase 3: Query Optimization** (2-3 weeks) -7. Refactor controllers to use query builder with selective loading -8. Optimize database connection pooling -9. Implement bulk operations for imports - -**Phase 4: Infrastructure** (ongoing) -10. Monitor query performance and add more indexes as needed -11. Consider read replicas when database becomes bottleneck - ---- - -## πŸ” Performance Monitoring Recommendations - -### Add Metrics Endpoint -```typescript -// src/controllers/MetricsController.ts -import { performance } from 'perf_hooks'; - -const requestMetrics = { - totalRequests: 0, - avgLatency: 0, - p95Latency: 0, - dbQueryCount: 0, - cacheHitRate: 0, -}; - -@JsonController('/metrics') -export class MetricsController { - @Get() - @Authorized('ADMIN') // Restrict to admins - async getMetrics() { - return requestMetrics; - } -} -``` - -### Enable Query Logging -```typescript -// ormconfig.js -logging: ['query', 'error'], -maxQueryExecutionTime: 1000, // Warn on queries >1s -``` - -### Add Request Timing Middleware -```typescript -// src/middlewares/TimingMiddleware.ts -export function timingMiddleware(req: Request, res: Response, next: NextFunction) { - const start = performance.now(); - - res.on('finish', () => { - const duration = performance.now() - start; - if (duration > 1000) { - consola.warn(`Slow request: ${req.method} ${req.path} took ${duration}ms`); - } - }); - - next(); -} -``` - ---- - -## πŸ“ Performance Testing Commands - -```bash -# Run baseline benchmark -bun run benchmark > baseline.txt - -# After implementing changes, compare -bun run benchmark > optimized.txt -diff baseline.txt optimized.txt - -# Load testing with artillery (if added) -artillery quick --count 100 --num 10 http://localhost:4010/api/runners - -# Database query profiling (PostgreSQL) -EXPLAIN ANALYZE SELECT * FROM scan WHERE runner_id = 1; - -# Check database indexes -SELECT * FROM pg_indexes WHERE tablename = 'scan'; - -# Monitor NATS cache hit rate -# (Add custom logging in NATS KV functions) -``` - ---- - -## πŸŽ“ Key Principles - -1. **Measure first**: Always benchmark before and after changes -2. **Start with indexes**: Biggest impact, lowest risk -3. **Cache strategically**: Stats endpoints benefit most -4. **Lazy load by default**: Only eager load when absolutely needed -5. **Monitor in production**: Use APM tools (New Relic, DataDog, etc.) - ---- - -## πŸ“š Additional Resources - -- [TypeORM Performance Tips](https://typeorm.io/performance) -- [PostgreSQL Index Best Practices](https://www.postgresql.org/docs/current/indexes.html) -- [Bun Performance Benchmarks](https://bun.sh/docs/runtime/performance) -- [NATS JetStream KV Guide](https://docs.nats.io/nats-concepts/jetstream/key-value-store) - ---- - -**Last Updated**: 2026-02-20 -**Status**: Ready for review and prioritization