"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.moveStalledJobsToWait = void 0; const content = `--[[ Move stalled jobs to wait. Input: KEYS[1] 'stalled' (SET) KEYS[2] 'wait', (LIST) KEYS[3] 'active', (LIST) KEYS[4] 'failed', (ZSET) KEYS[5] 'stalled-check', (KEY) KEYS[6] 'meta', (KEY) KEYS[7] 'paused', (LIST) KEYS[8] 'marker' KEYS[9] 'event stream' (STREAM) ARGV[1] Max stalled job count ARGV[2] queue.toKey('') ARGV[3] timestamp ARGV[4] max check time Events: 'stalled' with stalled job id. ]] local rcall = redis.call -- Includes --[[ Function to add job in target list and add marker if needed. ]] -- Includes --[[ Add marker if needed when a job is available. ]] local function addBaseMarkerIfNeeded(markerKey, isPausedOrMaxed) if not isPausedOrMaxed then rcall("ZADD", markerKey, 0, "0") end end local function addJobInTargetList(targetKey, markerKey, pushCmd, isPausedOrMaxed, jobId) rcall(pushCmd, targetKey, jobId) addBaseMarkerIfNeeded(markerKey, isPausedOrMaxed) end --[[ Function to loop in batches. Just a bit of warning, some commands as ZREM could receive a maximum of 7000 parameters per call. ]] local function batches(n, batchSize) local i = 0 return function() local from = i * batchSize + 1 i = i + 1 if (from <= n) then local to = math.min(from + batchSize - 1, n) return from, to end end end --[[ Function to check for the meta.paused key to decide if we are paused or not (since an empty list and !EXISTS are not really the same). ]] local function getTargetQueueList(queueMetaKey, activeKey, waitKey, pausedKey) local queueAttributes = rcall("HMGET", queueMetaKey, "paused", "concurrency") if queueAttributes[1] then return pausedKey, true else if queueAttributes[2] then local activeCount = rcall("LLEN", activeKey) if activeCount >= tonumber(queueAttributes[2]) then return waitKey, true else return waitKey, false end end end return waitKey, false end --[[ Function to recursively move from waitingChildren to failed. ]] -- Includes --[[ Validate and move parent to active if needed. ]] -- Includes --[[ Add delay marker if needed. ]] -- Includes --[[ Function to return the next delayed job timestamp. ]] local function getNextDelayedTimestamp(delayedKey) local result = rcall("ZRANGE", delayedKey, 0, 0, "WITHSCORES") if #result then local nextTimestamp = tonumber(result[2]) if nextTimestamp ~= nil then return nextTimestamp / 0x1000 end end end local function addDelayMarkerIfNeeded(markerKey, delayedKey) local nextTimestamp = getNextDelayedTimestamp(delayedKey) if nextTimestamp ~= nil then -- Replace the score of the marker with the newest known -- next timestamp. rcall("ZADD", markerKey, nextTimestamp, "1") end end --[[ Function to add job considering priority. ]] -- Includes local function addJobWithPriority(markerKey, prioritizedKey, priority, jobId, priorityCounterKey, isPausedOrMaxed) local prioCounter = rcall("INCR", priorityCounterKey) local score = priority * 0x100000000 + prioCounter % 0x100000000 rcall("ZADD", prioritizedKey, score, jobId) addBaseMarkerIfNeeded(markerKey, isPausedOrMaxed) end --[[ Function to check if queue is paused or maxed (since an empty list and !EXISTS are not really the same). ]] local function isQueuePausedOrMaxed(queueMetaKey, activeKey) local queueAttributes = rcall("HMGET", queueMetaKey, "paused", "concurrency") if queueAttributes[1] then return true else if queueAttributes[2] then local activeCount = rcall("LLEN", activeKey) return activeCount >= tonumber(queueAttributes[2]) end end return false end local function moveParentToWaitIfNeeded(parentQueueKey, parentDependenciesKey, parentKey, parentId, timestamp) local isParentActive = rcall("ZSCORE", parentQueueKey .. ":waiting-children", parentId) if rcall("SCARD", parentDependenciesKey) == 0 and isParentActive then rcall("ZREM", parentQueueKey .. ":waiting-children", parentId) local parentWaitKey = parentQueueKey .. ":wait" local parentPausedKey = parentQueueKey .. ":paused" local parentActiveKey = parentQueueKey .. ":active" local parentMetaKey = parentQueueKey .. ":meta" local parentMarkerKey = parentQueueKey .. ":marker" local jobAttributes = rcall("HMGET", parentKey, "priority", "delay") local priority = tonumber(jobAttributes[1]) or 0 local delay = tonumber(jobAttributes[2]) or 0 if delay > 0 then local delayedTimestamp = tonumber(timestamp) + delay local score = delayedTimestamp * 0x1000 local parentDelayedKey = parentQueueKey .. ":delayed" rcall("ZADD", parentDelayedKey, score, parentId) rcall("XADD", parentQueueKey .. ":events", "*", "event", "delayed", "jobId", parentId, "delay", delayedTimestamp) addDelayMarkerIfNeeded(parentMarkerKey, parentDelayedKey) else if priority == 0 then local parentTarget, isParentPausedOrMaxed = getTargetQueueList(parentMetaKey, parentActiveKey, parentWaitKey, parentPausedKey) addJobInTargetList(parentTarget, parentMarkerKey, "RPUSH", isParentPausedOrMaxed, parentId) else local isPausedOrMaxed = isQueuePausedOrMaxed(parentMetaKey, parentActiveKey) addJobWithPriority(parentMarkerKey, parentQueueKey .. ":prioritized", priority, parentId, parentQueueKey .. ":pc", isPausedOrMaxed) end rcall("XADD", parentQueueKey .. ":events", "*", "event", "waiting", "jobId", parentId, "prev", "waiting-children") end end end --[[ Function to remove deduplication key if needed. ]] local function removeDeduplicationKeyIfNeeded(prefixKey, deduplicationId) if deduplicationId then local deduplicationKey = prefixKey .. "de:" .. deduplicationId local pttl = rcall("PTTL", deduplicationKey) if pttl == 0 or pttl == -1 then rcall("DEL", deduplicationKey) end end end local function moveParentFromWaitingChildrenToFailed( parentQueueKey, parentKey, parentId, jobIdKey, timestamp) if rcall("ZREM", parentQueueKey .. ":waiting-children", parentId) == 1 then rcall("ZADD", parentQueueKey .. ":failed", timestamp, parentId) local failedReason = "child " .. jobIdKey .. " failed" rcall("HMSET", parentKey, "failedReason", failedReason, "finishedOn", timestamp) rcall("XADD", parentQueueKey .. ":events", "*", "event", "failed", "jobId", parentId, "failedReason", failedReason, "prev", "waiting-children") local jobAttributes = rcall("HMGET", parentKey, "parent", "deid") removeDeduplicationKeyIfNeeded(parentQueueKey .. ":", jobAttributes[2]) if jobAttributes[1] then local parentData = cjson.decode(jobAttributes[1]) if parentData['fpof'] then moveParentFromWaitingChildrenToFailed( parentData['queueKey'], parentData['queueKey'] .. ':' .. parentData['id'], parentData['id'], parentKey, timestamp ) elseif parentData['idof'] or parentData['rdof'] then local grandParentKey = parentData['queueKey'] .. ':' .. parentData['id'] local grandParentDependenciesSet = grandParentKey .. ":dependencies" if rcall("SREM", grandParentDependenciesSet, parentKey) == 1 then moveParentToWaitIfNeeded(parentData['queueKey'], grandParentDependenciesSet, grandParentKey, parentData['id'], timestamp) if parentData['idof'] then local grandParentFailedSet = grandParentKey .. ":failed" rcall("HSET", grandParentFailedSet, parentKey, failedReason) end end end end end end --[[ Function to remove job. ]] -- Includes --[[ Function to remove deduplication key. ]] local function removeDeduplicationKey(prefixKey, jobKey) local deduplicationId = rcall("HGET", jobKey, "deid") if deduplicationId then local deduplicationKey = prefixKey .. "de:" .. deduplicationId rcall("DEL", deduplicationKey) end end --[[ Function to remove job keys. ]] local function removeJobKeys(jobKey) return rcall("DEL", jobKey, jobKey .. ':logs', jobKey .. ':dependencies', jobKey .. ':processed', jobKey .. ':failed') end --[[ Check if this job has a parent. If so we will just remove it from the parent child list, but if it is the last child we should move the parent to "wait/paused" which requires code from "moveToFinished" ]] -- Includes --[[ Functions to destructure job key. Just a bit of warning, these functions may be a bit slow and affect performance significantly. ]] local getJobIdFromKey = function (jobKey) return string.match(jobKey, ".*:(.*)") end local getJobKeyPrefix = function (jobKey, jobId) return string.sub(jobKey, 0, #jobKey - #jobId) end local function moveParentToWait(parentPrefix, parentId, emitEvent) local parentTarget, isPausedOrMaxed = getTargetQueueList(parentPrefix .. "meta", parentPrefix .. "active", parentPrefix .. "wait", parentPrefix .. "paused") addJobInTargetList(parentTarget, parentPrefix .. "marker", "RPUSH", isPausedOrMaxed, parentId) if emitEvent then local parentEventStream = parentPrefix .. "events" rcall("XADD", parentEventStream, "*", "event", "waiting", "jobId", parentId, "prev", "waiting-children") end end local function removeParentDependencyKey(jobKey, hard, parentKey, baseKey, debounceId) if parentKey then local parentDependenciesKey = parentKey .. ":dependencies" local result = rcall("SREM", parentDependenciesKey, jobKey) if result > 0 then local pendingDependencies = rcall("SCARD", parentDependenciesKey) if pendingDependencies == 0 then local parentId = getJobIdFromKey(parentKey) local parentPrefix = getJobKeyPrefix(parentKey, parentId) local numRemovedElements = rcall("ZREM", parentPrefix .. "waiting-children", parentId) if numRemovedElements == 1 then if hard then -- remove parent in same queue if parentPrefix == baseKey then removeParentDependencyKey(parentKey, hard, nil, baseKey, nil) removeJobKeys(parentKey) if debounceId then rcall("DEL", parentPrefix .. "de:" .. debounceId) end else moveParentToWait(parentPrefix, parentId) end else moveParentToWait(parentPrefix, parentId, true) end end end return true end else local parentAttributes = rcall("HMGET", jobKey, "parentKey", "deid") local missedParentKey = parentAttributes[1] if( (type(missedParentKey) == "string") and missedParentKey ~= "" and (rcall("EXISTS", missedParentKey) == 1)) then local parentDependenciesKey = missedParentKey .. ":dependencies" local result = rcall("SREM", parentDependenciesKey, jobKey) if result > 0 then local pendingDependencies = rcall("SCARD", parentDependenciesKey) if pendingDependencies == 0 then local parentId = getJobIdFromKey(missedParentKey) local parentPrefix = getJobKeyPrefix(missedParentKey, parentId) local numRemovedElements = rcall("ZREM", parentPrefix .. "waiting-children", parentId) if numRemovedElements == 1 then if hard then if parentPrefix == baseKey then removeParentDependencyKey(missedParentKey, hard, nil, baseKey, nil) removeJobKeys(missedParentKey) if parentAttributes[2] then rcall("DEL", parentPrefix .. "de:" .. parentAttributes[2]) end else moveParentToWait(parentPrefix, parentId) end else moveParentToWait(parentPrefix, parentId, true) end end end return true end end end return false end local function removeJob(jobId, hard, baseKey, shouldRemoveDeduplicationKey) local jobKey = baseKey .. jobId removeParentDependencyKey(jobKey, hard, nil, baseKey) if shouldRemoveDeduplicationKey then removeDeduplicationKey(baseKey, jobKey) end removeJobKeys(jobKey) end --[[ Functions to remove jobs by max age. ]] -- Includes local function removeJobsByMaxAge(timestamp, maxAge, targetSet, prefix, shouldRemoveDebounceKey) local start = timestamp - maxAge * 1000 local jobIds = rcall("ZREVRANGEBYSCORE", targetSet, start, "-inf") for i, jobId in ipairs(jobIds) do removeJob(jobId, false, prefix, false --[[remove debounce key]]) end rcall("ZREMRANGEBYSCORE", targetSet, "-inf", start) end --[[ Functions to remove jobs by max count. ]] -- Includes local function removeJobsByMaxCount(maxCount, targetSet, prefix) local start = maxCount local jobIds = rcall("ZREVRANGE", targetSet, start, -1) for i, jobId in ipairs(jobIds) do removeJob(jobId, false, prefix, false --[[remove debounce key]]) end rcall("ZREMRANGEBYRANK", targetSet, 0, -(maxCount + 1)) end --[[ Function to trim events, default 10000. ]] -- Includes --[[ Function to get max events value or set by default 10000. ]] local function getOrSetMaxEvents(metaKey) local maxEvents = rcall("HGET", metaKey, "opts.maxLenEvents") if not maxEvents then maxEvents = 10000 rcall("HSET", metaKey, "opts.maxLenEvents", maxEvents) end return maxEvents end local function trimEvents(metaKey, eventStreamKey) local maxEvents = getOrSetMaxEvents(metaKey) if maxEvents ~= false then rcall("XTRIM", eventStreamKey, "MAXLEN", "~", maxEvents) else rcall("XTRIM", eventStreamKey, "MAXLEN", "~", 10000) end end local stalledKey = KEYS[1] local waitKey = KEYS[2] local activeKey = KEYS[3] local failedKey = KEYS[4] local stalledCheckKey = KEYS[5] local metaKey = KEYS[6] local pausedKey = KEYS[7] local markerKey = KEYS[8] local eventStreamKey = KEYS[9] local maxStalledJobCount = tonumber(ARGV[1]) local queueKeyPrefix = ARGV[2] local timestamp = ARGV[3] local maxCheckTime = ARGV[4] if rcall("EXISTS", stalledCheckKey) == 1 then return {{}, {}} end rcall("SET", stalledCheckKey, timestamp, "PX", maxCheckTime) -- Trim events before emiting them to avoid trimming events emitted in this script trimEvents(metaKey, eventStreamKey) -- Move all stalled jobs to wait local stalling = rcall('SMEMBERS', stalledKey) local stalled = {} local failed = {} if (#stalling > 0) then rcall('DEL', stalledKey) -- Remove from active list for i, jobId in ipairs(stalling) do -- Markers in waitlist DEPRECATED in v5: Remove in v6. if string.sub(jobId, 1, 2) == "0:" then -- If the jobId is a delay marker ID we just remove it. rcall("LREM", activeKey, 1, jobId) else local jobKey = queueKeyPrefix .. jobId -- Check that the lock is also missing, then we can handle this job as really stalled. if (rcall("EXISTS", jobKey .. ":lock") == 0) then -- Remove from the active queue. local removed = rcall("LREM", activeKey, 1, jobId) if (removed > 0) then -- If this job has been stalled too many times, such as if it crashes the worker, then fail it. local stalledCount = rcall("HINCRBY", jobKey, "stalledCounter", 1) if (stalledCount > maxStalledJobCount) then local jobAttributes = rcall("HMGET", jobKey, "opts", "parent", "deid") local rawOpts = jobAttributes[1] local rawParentData = jobAttributes[2] local opts = cjson.decode(rawOpts) local removeOnFailType = type(opts["removeOnFail"]) rcall("ZADD", failedKey, timestamp, jobId) removeDeduplicationKeyIfNeeded(queueKeyPrefix, jobAttributes[3]) local failedReason = "job stalled more than allowable limit" rcall("HMSET", jobKey, "failedReason", failedReason, "finishedOn", timestamp) rcall("XADD", eventStreamKey, "*", "event", "failed", "jobId", jobId, 'prev', 'active', 'failedReason', failedReason) if rawParentData ~= false then if opts['fpof'] then local parentData = cjson.decode(rawParentData) moveParentFromWaitingChildrenToFailed( parentData['queueKey'], parentData['queueKey'] .. ':' .. parentData['id'], parentData['id'], jobKey, timestamp ) elseif opts['idof'] or opts['rdof'] then local parentData = cjson.decode(rawParentData) local parentKey = parentData['queueKey'] .. ':' .. parentData['id'] local dependenciesSet = parentKey .. ":dependencies" if rcall("SREM", dependenciesSet, jobKey) == 1 then moveParentToWaitIfNeeded(parentData['queueKey'], dependenciesSet, parentKey, parentData['id'], timestamp) if opts['idof'] then local failedSet = parentKey .. ":failed" rcall("HSET", failedSet, jobKey, failedReason) end end end end if removeOnFailType == "number" then removeJobsByMaxCount(opts["removeOnFail"], failedKey, queueKeyPrefix) elseif removeOnFailType == "boolean" then if opts["removeOnFail"] then removeJob(jobId, false, queueKeyPrefix, false --[[remove debounce key]]) rcall("ZREM", failedKey, jobId) end elseif removeOnFailType ~= "nil" then local maxAge = opts["removeOnFail"]["age"] local maxCount = opts["removeOnFail"]["count"] if maxAge ~= nil then removeJobsByMaxAge(timestamp, maxAge, failedKey, queueKeyPrefix) end if maxCount ~= nil and maxCount > 0 then removeJobsByMaxCount(maxCount, failedKey, queueKeyPrefix) end end table.insert(failed, jobId) else local target, isPausedOrMaxed = getTargetQueueList(metaKey, activeKey, waitKey, pausedKey) -- Move the job back to the wait queue, to immediately be picked up by a waiting worker. addJobInTargetList(target, markerKey, "RPUSH", isPausedOrMaxed, jobId) rcall("XADD", eventStreamKey, "*", "event", "waiting", "jobId", jobId, 'prev', 'active') -- Emit the stalled event rcall("XADD", eventStreamKey, "*", "event", "stalled", "jobId", jobId) table.insert(stalled, jobId) end end end end end end -- Mark potentially stalled jobs local active = rcall('LRANGE', activeKey, 0, -1) if (#active > 0) then for from, to in batches(#active, 7000) do rcall('SADD', stalledKey, unpack(active, from, to)) end end return {failed, stalled}`; exports.moveStalledJobsToWait = { name: 'moveStalledJobsToWait', content, keys: 9, }; //# sourceMappingURL=moveStalledJobsToWait-9.js.map