Dead Jobs
Dead jobs are jobs that have exhausted all retry attempts. Nerve helps you manage, investigate, and recover from these failures.Listing Dead Jobs
Copy
dead = BrainzLab::Nerve.dead_jobs(limit: 50)
dead.each do |job|
puts "#{job.class_name}: #{job.error_message}"
puts " Failed at: #{job.died_at}"
puts " Attempts: #{job.attempts}"
puts " Queue: #{job.queue}"
end
Dead Job Details
Copy
job = BrainzLab::Nerve.dead_job("dead_abc123")
puts job.id
puts job.class_name
puts job.queue
puts job.arguments
puts job.error_class
puts job.error_message
puts job.backtrace
puts job.attempts
puts job.first_failed_at
puts job.died_at
puts job.original_enqueued_at
Filtering Dead Jobs
Copy
# By error type
not_found = BrainzLab::Nerve.dead_jobs(
error_class: "ActiveRecord::RecordNotFound"
)
# By job class
import_failures = BrainzLab::Nerve.dead_jobs(
class_name: "ImportDataJob"
)
# By time range
recent = BrainzLab::Nerve.dead_jobs(
died_after: 1.day.ago
)
# By queue
mailer_dead = BrainzLab::Nerve.dead_jobs(
queue: :mailers
)
Retrying Dead Jobs
Copy
# Retry a single job
BrainzLab::Nerve.retry_dead_job("dead_abc123")
# Retry multiple jobs
BrainzLab::Nerve.retry_dead_jobs(["dead_abc123", "dead_def456"])
# Retry all jobs of a class
BrainzLab::Nerve.retry_dead_jobs_by_class("ImportDataJob")
# Retry all jobs with specific error
BrainzLab::Nerve.retry_dead_jobs_by_error("Timeout::Error")
# Retry all dead jobs (use with caution!)
BrainzLab::Nerve.retry_all_dead_jobs(confirm: true)
Deleting Dead Jobs
Copy
# Delete a single job
BrainzLab::Nerve.delete_dead_job("dead_abc123")
# Delete jobs older than 30 days
BrainzLab::Nerve.cleanup_dead_jobs(older_than: 30.days.ago)
# Delete all jobs of a class
BrainzLab::Nerve.delete_dead_jobs_by_class("ObsoleteJob")
# Clear all dead jobs (use with caution!)
BrainzLab::Nerve.clear_dead_jobs(confirm: true)
Dead Job Analysis
Understand patterns in failures:Copy
analysis = BrainzLab::Nerve.dead_job_analysis(period: :week)
# Top failing job classes
analysis.by_class.each do |class_name, count|
puts "#{class_name}: #{count} failures"
end
# Top error types
analysis.by_error.each do |error_class, count|
puts "#{error_class}: #{count} occurrences"
end
# Failure timeline
analysis.timeline.each do |hour, count|
puts "#{hour}: #{count} failures"
end
Alerts
Set up alerts for dead job patterns:Copy
# Alert on new dead jobs
BrainzLab::Nerve.alert_on_dead_jobs(
threshold: 10, # per hour
channel: :slack
)
# Alert on specific job class failures
BrainzLab::Nerve.alert_on_dead_jobs(
class_name: "PaymentJob",
threshold: 1, # any failure
channel: :pagerduty
)
Retention
Configure how long dead jobs are retained:Copy
BrainzLab::Nerve.configure do |config|
config.dead_job_retention = 30.days
config.auto_cleanup = true
end