Human in the Loop
If a headless scrape hits a CAPTCHA or block page, automatically spawn a visible browser for human intervention, then resume extraction.
name = "Hybrid Recovery"url = "https://example.com/protected-page"selector = ".data-item"fields = [ "title:.title", "value:.value"]interval = 600local HEADLESS_PATH = "/usr/bin/google-chrome"local VISUAL_PATH = "/usr/bin/google-chrome"local DATA_SELECTOR = ".item-row"local BLOCK_SELECTOR = "#captcha-iframe"local SUCCESS_SELECTOR = ".dashboard-loaded"
function get_browser(headless) return cdp.launch({ executable = headless and HEADLESS_PATH or VISUAL_PATH, headless = headless, keep_alive = true })end
function before_fetch(request) local state = store_get("recovery_state") or "NORMAL"
if state == "RECOVERING" then local page = visual_browser:attach({ reuse = true }) local solved = page:evaluate( string.format("document.querySelector('%s') !== null", SUCCESS_SELECTOR) )
if solved then store_set("recovery_state", "SOLVED") return request end
local start = tonumber(store_get("recovery_start") or 0) if os.time() - start > 900 then visual_browser:close() visual_browser = nil store_set("recovery_state", "NORMAL") return nil end
return nil end
return requestend
function override_fetch(request) local state = store_get("recovery_state") or "NORMAL"
if state == "SOLVED" then local page = visual_browser:attach({ reuse = true }) local html = page:content() page:close() visual_browser:close() visual_browser = nil store_set("recovery_state", "NORMAL") return { status = 200, body = html, url = request.url } end
if not browser then browser = get_browser(true) end
local page = browser:attach() defer(function() page:close() end)
local ok, err = page:open(request.url) if not ok then return { error = tostring(err) } end
local found = page:wait_for_selector(DATA_SELECTOR, 8000) if found then return { status = 200, body = page:content(), url = request.url } end
local blocked = page:evaluate( string.format("document.querySelector('%s') !== null", BLOCK_SELECTOR) )
if blocked then browser:close() browser = nil visual_browser = get_browser(false) local v_page = visual_browser:attach() v_page:open(request.url)
store_set("recovery_state", "RECOVERING") store_set("recovery_start", tostring(os.time()))
notify("Scraper Blocked", "Please solve the CAPTCHA.") return nil end
return { error = "Data not found, no block detected" }endKey Concepts
Section titled “Key Concepts”store_*persists recovery state across runs- Headless browser runs normally until a block is detected
- Visual browser spawns for human intervention
before_fetchchecks if the puzzle was solved before continuing- Timeout resets the cycle if no one intervenes