Database Optimization: Performance Tuning and Best Practices
Database optimization is crucial for maintaining high-performance applications. Let's explore essential techniques and best practices for optimizing database performance.
Query Optimization
1. Index Management
Implement efficient indexing strategies:
-- Create composite index for frequently used columns CREATE INDEX idx_users_email_status ON users(email, status); -- Create partial index for specific conditions CREATE INDEX idx_active_users ON users(created_at) WHERE status = 'active'; -- Create covering index for common queries CREATE INDEX idx_posts_author_date ON posts(author_id, created_at) INCLUDE (title, status); -- Analyze index usage SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch FROM pg_stat_user_indexes ORDER BY idx_scan DESC;
2. Query Performance Analysis
Analyze and optimize query execution:
// database/query-analyzer.ts import { Pool } from "pg"; export class QueryAnalyzer { constructor(private pool: Pool) {} async analyzeQuery(query: string): Promise<QueryAnalysis> { const explain = await this.pool.query(`EXPLAIN ANALYZE ${query}`); return this.parseExplainOutput(explain.rows); } async identifySlowQueries(threshold: number): Promise<SlowQuery[]> { const query = ` SELECT query, calls, total_time / calls as avg_time, rows / calls as avg_rows FROM pg_stat_statements WHERE total_time / calls > $1 ORDER BY avg_time DESC LIMIT 10 `; const result = await this.pool.query(query, [threshold]); return result.rows; } async suggestIndexes(): Promise<IndexSuggestion[]> { const query = ` SELECT schemaname, tablename, attname, seq_scan, idx_scan FROM pg_stat_user_tables t JOIN pg_stats s ON t.relname = s.tablename WHERE seq_scan > idx_scan AND n_distinct > 100 ORDER BY seq_scan DESC `; const result = await this.pool.query(query); return result.rows.map((row) => ({ table: row.tablename, column: row.attname, seqScans: row.seq_scan, suggestion: `Consider adding index on ${row.tablename}(${row.attname})`, })); } } // Example usage const analyzer = new QueryAnalyzer(pool); const analysis = await analyzer.analyzeQuery( "SELECT * FROM users WHERE email LIKE $1" ); console.log(analysis);
Database Schema Optimization
1. Table Partitioning
Implement table partitioning for large tables:
-- Create partitioned table CREATE TABLE events ( id SERIAL, event_type VARCHAR(50), created_at TIMESTAMP, payload JSONB ) PARTITION BY RANGE (created_at); -- Create partitions CREATE TABLE events_2024_q1 PARTITION OF events FOR VALUES FROM ('2024-01-01') TO ('2024-04-01'); CREATE TABLE events_2024_q2 PARTITION OF events FOR VALUES FROM ('2024-04-01') TO ('2024-07-01'); -- Create indexes on partitions CREATE INDEX idx_events_2024_q1_type ON events_2024_q1(event_type); CREATE INDEX idx_events_2024_q2_type ON events_2024_q2(event_type);
2. Normalization and Denormalization
Balance between normalization and denormalization:
// database/schema-optimizer.ts interface TableStats { tableName: string; rowCount: number; avgRowSize: number; totalSize: number; indexSize: number; } export class SchemaOptimizer { async analyzeTableStats(): Promise<TableStats[]> { const query = ` SELECT relname as table_name, n_live_tup as row_count, pg_total_relation_size(relid) as total_size, pg_indexes_size(relid) as index_size, pg_relation_size(relid) / n_live_tup as avg_row_size FROM pg_stat_user_tables WHERE n_live_tup > 0 ORDER BY total_size DESC `; const result = await this.pool.query(query); return result.rows; } async suggestDenormalization(): Promise<DenormalizationSuggestion[]> { const query = ` SELECT relname, seq_scan, n_live_tup, n_dead_tup, last_vacuum, last_autovacuum FROM pg_stat_user_tables WHERE n_live_tup > 100000 ORDER BY seq_scan DESC `; const result = await this.pool.query(query); return result.rows.map((row) => ({ table: row.relname, suggestion: this.generateDenormalizationSuggestion(row), })); } private generateDenormalizationSuggestion(stats: any): string { if (stats.seq_scan > 1000 && stats.n_live_tup > 500000) { return `Consider denormalizing frequently accessed data from ${stats.relname}`; } return `Monitor query patterns on ${stats.relname} for potential denormalization`; } }
Query Caching
1. Redis Caching Implementation
Implement efficient caching strategies:
// database/cache.service.ts import Redis from "ioredis"; export class CacheService { private redis: Redis; private readonly DEFAULT_TTL = 3600; // 1 hour constructor() { this.redis = new Redis({ host: process.env.REDIS_HOST, port: parseInt(process.env.REDIS_PORT!), password: process.env.REDIS_PASSWORD, }); } async get<T>(key: string): Promise<T | null> { const cached = await this.redis.get(key); return cached ? JSON.parse(cached) : null; } async set( key: string, value: any, ttl: number = this.DEFAULT_TTL ): Promise<void> { await this.redis.set(key, JSON.stringify(value), "EX", ttl); } async invalidate(pattern: string): Promise<void> { const keys = await this.redis.keys(pattern); if (keys.length > 0) { await this.redis.del(...keys); } } generateCacheKey(prefix: string, params: Record<string, any>): string { const sortedParams = Object.entries(params) .sort(([a], [b]) => a.localeCompare(b)) .map(([key, value]) => `${key}:${value}`) .join(":"); return `${prefix}:${sortedParams}`; } } // Example usage with repository pattern export class UserRepository { constructor(private db: Pool, private cache: CacheService) {} async findById(id: string): Promise<User | null> { const cacheKey = this.cache.generateCacheKey("user", { id }); const cached = await this.cache.get<User>(cacheKey); if (cached) { return cached; } const result = await this.db.query("SELECT * FROM users WHERE id = $1", [ id, ]); const user = result.rows[0] || null; if (user) { await this.cache.set(cacheKey, user); } return user; } async update(id: string, data: Partial<User>): Promise<User> { const result = await this.db.query( "UPDATE users SET ... WHERE id = $1 RETURNING *", [id, ...Object.values(data)] ); const user = result.rows[0]; // Invalidate cache await this.cache.invalidate(`user:id:${id}`); return user; } }
Connection Pool Management
1. Database Pool Configuration
Optimize connection pool settings:
// database/pool.config.ts import { Pool } from "pg"; export class DatabasePool { private static instance: Pool; static getInstance(): Pool { if (!this.instance) { this.instance = new Pool({ host: process.env.DB_HOST, port: parseInt(process.env.DB_PORT!), database: process.env.DB_NAME, user: process.env.DB_USER, password: process.env.DB_PASSWORD, // Pool configuration max: 20, // Maximum number of clients idleTimeoutMillis: 30000, connectionTimeoutMillis: 2000, maxUses: 7500, // Maximum number of uses before client is destroyed }); // Pool error handling this.instance.on("error", (err) => { console.error("Unexpected error on idle client", err); process.exit(-1); }); // Pool monitoring this.instance.on("connect", (client) => { console.log("New client connected"); }); this.instance.on("remove", (client) => { console.log("Client removed from pool"); }); } return this.instance; } static async getPoolStats(): Promise<PoolStats> { const pool = this.getInstance(); return { totalCount: pool.totalCount, idleCount: pool.idleCount, waitingCount: pool.waitingCount, }; } }
Best Practices
- Regular Maintenance: Schedule regular VACUUM and ANALYZE operations
- Monitor Query Performance: Use tools like pg_stat_statements
- Optimize Indexes: Create and maintain efficient indexes
- Connection Management: Properly configure connection pools
- Caching Strategy: Implement appropriate caching mechanisms
- Query Optimization: Write efficient queries and use prepared statements
- Schema Design: Design schemas with performance in mind
- Regular Monitoring: Set up comprehensive database monitoring
Implementation Checklist
- Set up monitoring tools
- Implement connection pooling
- Configure query caching
- Optimize database indexes
- Implement partitioning strategy
- Set up maintenance schedules
- Configure backup solutions
- Document optimization strategies
Conclusion
Database optimization is an ongoing process that requires regular monitoring and maintenance. Focus on implementing these practices to maintain high performance and scalability.