chore(PERFORMANCE_IDEAS): Remove outdated performance optimization ideas document
This commit is contained in:
@@ -1,589 +0,0 @@
|
||||
# Performance Optimization Ideas for LfK Backend
|
||||
|
||||
This document outlines potential performance improvements for the LfK backend API, organized by impact and complexity.
|
||||
|
||||
---
|
||||
|
||||
## ✅ Already Implemented
|
||||
|
||||
### 1. Bun Runtime Migration
|
||||
**Status**: Complete
|
||||
**Impact**: 8-15% latency improvement
|
||||
**Details**: Migrated from Node.js to Bun runtime, achieving:
|
||||
- Parallel throughput: +8.3% (306 → 331 scans/sec)
|
||||
- Parallel p50 latency: -9.5% (21ms → 19ms)
|
||||
|
||||
### 2. NATS KV Cache for Scan Intake
|
||||
**Status**: Complete (based on code analysis)
|
||||
**Impact**: Significant reduction in DB reads for hot path
|
||||
**Details**: `ScanController.stationIntake()` uses NATS JetStream KV store to cache:
|
||||
- Station tokens (1-hour TTL)
|
||||
- Card→Runner mappings (1-hour TTL)
|
||||
- Runner state (no TTL, CAS-based updates)
|
||||
- Eliminates DB reads on cache hits
|
||||
- Prevents race conditions via compare-and-swap (CAS)
|
||||
|
||||
---
|
||||
|
||||
## 🚀 High Impact, Low-Medium Complexity
|
||||
|
||||
### 3. Add Database Indexes
|
||||
**Priority**: HIGH
|
||||
**Complexity**: Low
|
||||
**Estimated Impact**: 30-70% query time reduction
|
||||
|
||||
**Problem**: TypeORM synchronize() doesn't automatically create indexes on foreign keys or commonly queried fields.
|
||||
|
||||
**Observations**:
|
||||
- Heavy use of `find()` with complex nested relations (e.g., `['runner', 'track', 'runner.scans', 'runner.group', 'runner.scans.track']`)
|
||||
- No explicit `@Index()` decorators found in entity files
|
||||
- Frequent filtering by foreign keys (runner_id, track_id, station_id, card_id)
|
||||
|
||||
**Recommended Indexes**:
|
||||
|
||||
```typescript
|
||||
// src/models/entities/Scan.ts
|
||||
@Index(['runner', 'timestamp']) // For runner scan history queries
|
||||
@Index(['station', 'timestamp']) // For station-based queries
|
||||
@Index(['card']) // For card lookup
|
||||
|
||||
// src/models/entities/Runner.ts
|
||||
@Index(['email']) // For authentication/lookup
|
||||
@Index(['group']) // For group-based queries
|
||||
|
||||
// src/models/entities/RunnerCard.ts
|
||||
@Index(['runner']) // For card→runner lookups
|
||||
@Index(['code']) // For barcode scans
|
||||
|
||||
// src/models/entities/Donation.ts
|
||||
@Index(['runner']) // For runner donations
|
||||
@Index(['donor']) // For donor contributions
|
||||
```
|
||||
|
||||
**Implementation Steps**:
|
||||
1. Audit all entities and add `@Index()` decorators
|
||||
2. Test query performance with `EXPLAIN` before/after
|
||||
3. Monitor index usage with database tools
|
||||
4. Consider composite indexes for frequently combined filters
|
||||
|
||||
**Expected Results**:
|
||||
- 50-70% faster JOIN operations
|
||||
- 30-50% faster foreign key lookups
|
||||
- Reduced database CPU usage
|
||||
|
||||
---
|
||||
|
||||
### 4. Implement Query Result Caching
|
||||
**Priority**: HIGH
|
||||
**Complexity**: Medium
|
||||
**Estimated Impact**: 50-90% latency reduction for repeated queries
|
||||
|
||||
**Problem**: Stats endpoints and frequently accessed data (org totals, team rankings, runner lists) are recalculated on every request.
|
||||
|
||||
**Observations**:
|
||||
- `StatsController` methods load entire datasets with deep relations:
|
||||
- `getRunnerStats()`: loads all runners with scans, groups, donations
|
||||
- `getTeamStats()`: loads all teams with nested runner data
|
||||
- `getOrgStats()`: loads all orgs with teams, runners, scans
|
||||
- Many `find()` calls without any caching layer
|
||||
- Data changes infrequently (only during scan intake)
|
||||
|
||||
**Solution Options**:
|
||||
|
||||
**Option A: NATS KV Cache (Recommended)**
|
||||
```typescript
|
||||
// src/nats/StatsKV.ts
|
||||
export async function getOrgStatsCache(): Promise<ResponseOrgStats[] | null> {
|
||||
const kv = await NatsClient.getKV('stats_cache', { ttl: 60 * 1000 }); // 60s TTL
|
||||
const entry = await kv.get('org_stats');
|
||||
return entry ? JSON.parse(entry.string()) : null;
|
||||
}
|
||||
|
||||
export async function setOrgStatsCache(stats: ResponseOrgStats[]): Promise<void> {
|
||||
const kv = await NatsClient.getKV('stats_cache', { ttl: 60 * 1000 });
|
||||
await kv.put('org_stats', JSON.stringify(stats));
|
||||
}
|
||||
|
||||
// Invalidate on scan creation
|
||||
// src/controllers/ScanController.ts (after line 173)
|
||||
await invalidateStatsCache(); // Clear stats on new scan
|
||||
```
|
||||
|
||||
**Option B: In-Memory Cache with TTL**
|
||||
```typescript
|
||||
// src/cache/MemoryCache.ts
|
||||
import NodeCache from 'node-cache';
|
||||
|
||||
const cache = new NodeCache({ stdTTL: 60 }); // 60s TTL
|
||||
|
||||
export function getCached<T>(key: string): T | undefined {
|
||||
return cache.get<T>(key);
|
||||
}
|
||||
|
||||
export function setCached<T>(key: string, value: T, ttl?: number): void {
|
||||
cache.set(key, value, ttl);
|
||||
}
|
||||
|
||||
export function invalidatePattern(pattern: string): void {
|
||||
const keys = cache.keys().filter(k => k.includes(pattern));
|
||||
cache.del(keys);
|
||||
}
|
||||
```
|
||||
|
||||
**Option C: Redis Cache** (if Redis is already in stack)
|
||||
|
||||
**Recommended Cache Strategy**:
|
||||
- **TTL**: 30-60 seconds for stats endpoints
|
||||
- **Invalidation**: On scan creation, runner updates, donation changes
|
||||
- **Keys**: `stats:org`, `stats:team:${id}`, `stats:runner:${id}`
|
||||
- **Warm on startup**: Pre-populate cache for critical endpoints
|
||||
|
||||
**Expected Results**:
|
||||
- 80-90% latency reduction for stats endpoints (from ~500ms to ~50ms)
|
||||
- 70-80% reduction in database load
|
||||
- Improved user experience for dashboards and leaderboards
|
||||
|
||||
---
|
||||
|
||||
### 5. Lazy Load Relations & DTOs
|
||||
**Priority**: HIGH
|
||||
**Complexity**: Medium
|
||||
**Estimated Impact**: 40-60% query time reduction
|
||||
|
||||
**Problem**: Many queries eagerly load deeply nested relations that aren't always needed.
|
||||
|
||||
**Observations**:
|
||||
```typescript
|
||||
// Current: Loads everything
|
||||
scan = await this.scanRepository.findOne(
|
||||
{ id: scan.id },
|
||||
{ relations: ['runner', 'track', 'runner.scans', 'runner.group',
|
||||
'runner.scans.track', 'card', 'station'] }
|
||||
);
|
||||
```
|
||||
|
||||
**Solutions**:
|
||||
|
||||
**A. Create Lightweight Response DTOs**
|
||||
```typescript
|
||||
// src/models/responses/ResponseScanLight.ts
|
||||
export class ResponseScanLight {
|
||||
@IsInt() id: number;
|
||||
@IsInt() distance: number;
|
||||
@IsInt() timestamp: number;
|
||||
@IsBoolean() valid: boolean;
|
||||
// Omit nested runner.scans, runner.group, etc.
|
||||
}
|
||||
|
||||
// Use for list views
|
||||
@Get()
|
||||
@ResponseSchema(ResponseScanLight, { isArray: true })
|
||||
async getAll() {
|
||||
const scans = await this.scanRepository.find({
|
||||
relations: ['runner', 'track'] // Minimal relations
|
||||
});
|
||||
return scans.map(s => new ResponseScanLight(s));
|
||||
}
|
||||
|
||||
// Keep detailed DTO for single-item views
|
||||
@Get('/:id')
|
||||
@ResponseSchema(ResponseScan) // Full details
|
||||
async getOne(@Param('id') id: number) { ... }
|
||||
```
|
||||
|
||||
**B. Use Query Builder for Selective Loading**
|
||||
```typescript
|
||||
// Instead of loading all scans with runner relations:
|
||||
const scans = await this.scanRepository
|
||||
.createQueryBuilder('scan')
|
||||
.leftJoinAndSelect('scan.runner', 'runner')
|
||||
.leftJoinAndSelect('scan.track', 'track')
|
||||
.select([
|
||||
'scan.id', 'scan.distance', 'scan.timestamp', 'scan.valid',
|
||||
'runner.id', 'runner.firstname', 'runner.lastname',
|
||||
'track.id', 'track.name'
|
||||
])
|
||||
.where('scan.id = :id', { id })
|
||||
.getOne();
|
||||
```
|
||||
|
||||
**C. Implement GraphQL-style Field Selection**
|
||||
```typescript
|
||||
@Get()
|
||||
async getAll(@QueryParam('fields') fields?: string) {
|
||||
const relations = [];
|
||||
if (fields?.includes('runner')) relations.push('runner');
|
||||
if (fields?.includes('track')) relations.push('track');
|
||||
return this.scanRepository.find({ relations });
|
||||
}
|
||||
```
|
||||
|
||||
**Expected Results**:
|
||||
- 40-60% faster list queries
|
||||
- 50-70% reduction in data transfer size
|
||||
- Reduced JOIN complexity and memory usage
|
||||
|
||||
---
|
||||
|
||||
### 6. Pagination Optimization
|
||||
**Priority**: MEDIUM
|
||||
**Complexity**: Low
|
||||
**Estimated Impact**: 20-40% improvement for large result sets
|
||||
|
||||
**Problem**: Current pagination uses `skip/take` which becomes slow with large offsets.
|
||||
|
||||
**Current Implementation**:
|
||||
```typescript
|
||||
// Inefficient for large page numbers (e.g., page=1000)
|
||||
scans = await this.scanRepository.find({
|
||||
skip: page * page_size, // Scans 100,000 rows to skip them
|
||||
take: page_size
|
||||
});
|
||||
```
|
||||
|
||||
**Solutions**:
|
||||
|
||||
**A. Cursor-Based Pagination (Recommended)**
|
||||
```typescript
|
||||
@Get()
|
||||
async getAll(
|
||||
@QueryParam('cursor') cursor?: number, // Last ID from previous page
|
||||
@QueryParam('page_size') page_size: number = 100
|
||||
) {
|
||||
const query = this.scanRepository.createQueryBuilder('scan')
|
||||
.orderBy('scan.id', 'ASC')
|
||||
.take(page_size + 1); // Get 1 extra to determine if more pages exist
|
||||
|
||||
if (cursor) {
|
||||
query.where('scan.id > :cursor', { cursor });
|
||||
}
|
||||
|
||||
const scans = await query.getMany();
|
||||
const hasMore = scans.length > page_size;
|
||||
const results = scans.slice(0, page_size);
|
||||
const nextCursor = hasMore ? results[results.length - 1].id : null;
|
||||
|
||||
return {
|
||||
data: results.map(s => s.toResponse()),
|
||||
pagination: { nextCursor, hasMore }
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**B. Add Total Count Caching**
|
||||
```typescript
|
||||
// Cache total counts to avoid expensive COUNT(*) queries
|
||||
const totalCache = new Map<string, { count: number, expires: number }>();
|
||||
|
||||
async function getTotalCount(repo: Repository<any>): Promise<number> {
|
||||
const cacheKey = repo.metadata.tableName;
|
||||
const cached = totalCache.get(cacheKey);
|
||||
|
||||
if (cached && cached.expires > Date.now()) {
|
||||
return cached.count;
|
||||
}
|
||||
|
||||
const count = await repo.count();
|
||||
totalCache.set(cacheKey, { count, expires: Date.now() + 60000 }); // 60s TTL
|
||||
return count;
|
||||
}
|
||||
```
|
||||
|
||||
**Expected Results**:
|
||||
- 60-80% faster pagination for large page numbers
|
||||
- Consistent query performance regardless of offset
|
||||
- Better mobile app experience with cursor-based loading
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Medium Impact, Medium Complexity
|
||||
|
||||
### 7. Database Connection Pooling Optimization
|
||||
**Priority**: MEDIUM
|
||||
**Complexity**: Medium
|
||||
**Estimated Impact**: 10-20% improvement under load
|
||||
|
||||
**Current**: Default TypeORM connection pooling (likely 10 connections)
|
||||
|
||||
**Recommendations**:
|
||||
```typescript
|
||||
// ormconfig.js
|
||||
module.exports = {
|
||||
// ... existing config
|
||||
extra: {
|
||||
// PostgreSQL specific
|
||||
max: 20, // Max pool size (adjust based on load)
|
||||
min: 5, // Min pool size
|
||||
idleTimeoutMillis: 30000, // Close idle connections after 30s
|
||||
connectionTimeoutMillis: 2000,
|
||||
|
||||
// MySQL specific
|
||||
connectionLimit: 20,
|
||||
waitForConnections: true,
|
||||
queueLimit: 0
|
||||
},
|
||||
|
||||
// Enable query logging in dev to identify slow queries
|
||||
logging: process.env.NODE_ENV !== 'production' ? ['query', 'error'] : ['error'],
|
||||
maxQueryExecutionTime: 1000, // Log queries taking >1s
|
||||
};
|
||||
```
|
||||
|
||||
**Monitor**:
|
||||
- Connection pool exhaustion
|
||||
- Query execution times
|
||||
- Active connection count
|
||||
|
||||
---
|
||||
|
||||
### 8. Bulk Operations for Import
|
||||
**Priority**: MEDIUM
|
||||
**Complexity**: Medium
|
||||
**Estimated Impact**: 50-80% faster imports
|
||||
|
||||
**Problem**: Import endpoints likely save entities one-by-one in loops.
|
||||
|
||||
**Solution**:
|
||||
```typescript
|
||||
// Instead of:
|
||||
for (const runnerData of importData) {
|
||||
const runner = await createRunner.toEntity();
|
||||
await this.runnerRepository.save(runner); // N queries
|
||||
}
|
||||
|
||||
// Use bulk insert:
|
||||
const runners = await Promise.all(
|
||||
importData.map(data => createRunner.toEntity())
|
||||
);
|
||||
await this.runnerRepository.save(runners); // 1 query
|
||||
|
||||
// Or use raw query for massive imports:
|
||||
await getConnection()
|
||||
.createQueryBuilder()
|
||||
.insert()
|
||||
.into(Runner)
|
||||
.values(runners)
|
||||
.execute();
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 9. Response Compression
|
||||
**Priority**: MEDIUM
|
||||
**Complexity**: Low
|
||||
**Estimated Impact**: 60-80% reduction in response size
|
||||
|
||||
**Implementation**:
|
||||
```typescript
|
||||
// src/app.ts
|
||||
import compression from 'compression';
|
||||
|
||||
const app = createExpressServer({ ... });
|
||||
app.use(compression({
|
||||
level: 6, // Compression level (1-9)
|
||||
threshold: 1024, // Only compress responses >1KB
|
||||
filter: (req, res) => {
|
||||
if (req.headers['x-no-compression']) return false;
|
||||
return compression.filter(req, res);
|
||||
}
|
||||
}));
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- 70-80% smaller JSON responses
|
||||
- Faster transfer times on slow networks
|
||||
- Reduced bandwidth costs
|
||||
|
||||
**Dependencies**: `bun add compression @types/compression`
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Lower Priority / High Complexity
|
||||
|
||||
### 10. Implement Read Replicas
|
||||
**Priority**: LOW (requires infrastructure)
|
||||
**Complexity**: High
|
||||
**Estimated Impact**: 30-50% read query improvement
|
||||
|
||||
**When to Consider**:
|
||||
- Database CPU consistently >70%
|
||||
- Read-heavy workload (already true for stats endpoints)
|
||||
- Running PostgreSQL/MySQL in production
|
||||
|
||||
**Implementation**:
|
||||
```typescript
|
||||
// ormconfig.js
|
||||
module.exports = {
|
||||
type: 'postgres',
|
||||
replication: {
|
||||
master: {
|
||||
host: process.env.DB_WRITE_HOST,
|
||||
port: 5432,
|
||||
username: process.env.DB_USER,
|
||||
password: process.env.DB_PASSWORD,
|
||||
database: process.env.DB_NAME,
|
||||
},
|
||||
slaves: [
|
||||
{
|
||||
host: process.env.DB_READ_REPLICA_1,
|
||||
port: 5432,
|
||||
username: process.env.DB_USER,
|
||||
password: process.env.DB_PASSWORD,
|
||||
database: process.env.DB_NAME,
|
||||
}
|
||||
]
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 11. Move to Serverless/Edge Functions
|
||||
**Priority**: LOW (architectural change)
|
||||
**Complexity**: Very High
|
||||
**Estimated Impact**: Variable (depends on workload)
|
||||
|
||||
**Considerations**:
|
||||
- Good for: Infrequent workloads, global distribution
|
||||
- Bad for: High-frequency scan intake (cold starts)
|
||||
- May conflict with TypeORM's connection model
|
||||
|
||||
---
|
||||
|
||||
### 12. GraphQL API Layer
|
||||
**Priority**: LOW (major refactor)
|
||||
**Complexity**: Very High
|
||||
**Estimated Impact**: 30-50% for complex queries
|
||||
|
||||
**Benefits**:
|
||||
- Clients request only needed fields
|
||||
- Single request for complex nested data
|
||||
- Better mobile app performance
|
||||
|
||||
**Trade-offs**:
|
||||
- Complete rewrite of controller layer
|
||||
- Learning curve for frontend teams
|
||||
- More complex caching strategy
|
||||
|
||||
---
|
||||
|
||||
## 📊 Recommended Implementation Order
|
||||
|
||||
**Phase 1: Quick Wins** (1-2 weeks)
|
||||
1. Add database indexes → Controllers still work, immediate improvement
|
||||
2. Enable response compression → One-line change in `app.ts`
|
||||
3. Implement cursor-based pagination → Better mobile UX
|
||||
|
||||
**Phase 2: Caching Layer** (2-3 weeks)
|
||||
4. Add NATS KV cache for stats endpoints
|
||||
5. Create lightweight response DTOs for list views
|
||||
6. Cache total counts for pagination
|
||||
|
||||
**Phase 3: Query Optimization** (2-3 weeks)
|
||||
7. Refactor controllers to use query builder with selective loading
|
||||
8. Optimize database connection pooling
|
||||
9. Implement bulk operations for imports
|
||||
|
||||
**Phase 4: Infrastructure** (ongoing)
|
||||
10. Monitor query performance and add more indexes as needed
|
||||
11. Consider read replicas when database becomes bottleneck
|
||||
|
||||
---
|
||||
|
||||
## 🔍 Performance Monitoring Recommendations
|
||||
|
||||
### Add Metrics Endpoint
|
||||
```typescript
|
||||
// src/controllers/MetricsController.ts
|
||||
import { performance } from 'perf_hooks';
|
||||
|
||||
const requestMetrics = {
|
||||
totalRequests: 0,
|
||||
avgLatency: 0,
|
||||
p95Latency: 0,
|
||||
dbQueryCount: 0,
|
||||
cacheHitRate: 0,
|
||||
};
|
||||
|
||||
@JsonController('/metrics')
|
||||
export class MetricsController {
|
||||
@Get()
|
||||
@Authorized('ADMIN') // Restrict to admins
|
||||
async getMetrics() {
|
||||
return requestMetrics;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Enable Query Logging
|
||||
```typescript
|
||||
// ormconfig.js
|
||||
logging: ['query', 'error'],
|
||||
maxQueryExecutionTime: 1000, // Warn on queries >1s
|
||||
```
|
||||
|
||||
### Add Request Timing Middleware
|
||||
```typescript
|
||||
// src/middlewares/TimingMiddleware.ts
|
||||
export function timingMiddleware(req: Request, res: Response, next: NextFunction) {
|
||||
const start = performance.now();
|
||||
|
||||
res.on('finish', () => {
|
||||
const duration = performance.now() - start;
|
||||
if (duration > 1000) {
|
||||
consola.warn(`Slow request: ${req.method} ${req.path} took ${duration}ms`);
|
||||
}
|
||||
});
|
||||
|
||||
next();
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📝 Performance Testing Commands
|
||||
|
||||
```bash
|
||||
# Run baseline benchmark
|
||||
bun run benchmark > baseline.txt
|
||||
|
||||
# After implementing changes, compare
|
||||
bun run benchmark > optimized.txt
|
||||
diff baseline.txt optimized.txt
|
||||
|
||||
# Load testing with artillery (if added)
|
||||
artillery quick --count 100 --num 10 http://localhost:4010/api/runners
|
||||
|
||||
# Database query profiling (PostgreSQL)
|
||||
EXPLAIN ANALYZE SELECT * FROM scan WHERE runner_id = 1;
|
||||
|
||||
# Check database indexes
|
||||
SELECT * FROM pg_indexes WHERE tablename = 'scan';
|
||||
|
||||
# Monitor NATS cache hit rate
|
||||
# (Add custom logging in NATS KV functions)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎓 Key Principles
|
||||
|
||||
1. **Measure first**: Always benchmark before and after changes
|
||||
2. **Start with indexes**: Biggest impact, lowest risk
|
||||
3. **Cache strategically**: Stats endpoints benefit most
|
||||
4. **Lazy load by default**: Only eager load when absolutely needed
|
||||
5. **Monitor in production**: Use APM tools (New Relic, DataDog, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 📚 Additional Resources
|
||||
|
||||
- [TypeORM Performance Tips](https://typeorm.io/performance)
|
||||
- [PostgreSQL Index Best Practices](https://www.postgresql.org/docs/current/indexes.html)
|
||||
- [Bun Performance Benchmarks](https://bun.sh/docs/runtime/performance)
|
||||
- [NATS JetStream KV Guide](https://docs.nats.io/nats-concepts/jetstream/key-value-store)
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: 2026-02-20
|
||||
**Status**: Ready for review and prioritization
|
||||
Reference in New Issue
Block a user