So I’ve been trying to get this config working for hours now. Running it as a job ends it instantly with Invalid. But in Debugger I’m able to run through the whole thing without any issues. Whats the issue?
Config:
BLOCK:RegexReplace
LABEL:replace space with plus in queries
original = $"<input.CODE>"
pattern = "\\ "
replacement = "+"
=> VAR @regexReplaceOutput
ENDBLOCK
BLOCK:HttpRequest
LABEL:Startpage Page 1
url = "https://www.startpage.com/sp/search"
method = POST
customCookies = ${}
customHeaders = {("Cache-Control", "max-age=0"), ("Sec-Ch-Ua", "\";Not A Brand\";v=\"99\", \"Chromium\";v=\"94\""), ("Sec-Ch-Ua-Mobile", "?0"), ("Sec-Ch-Ua-Platform", "\"Windows\""), ("Upgrade-Insecure-Requests", "1"), ("Origin", "https://www.startpage.com"), ("Content-Type", "application/x-www-form-urlencoded"), ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), ("Sec-Fetch-Site", "same-origin"), ("Sec-Fetch-Mode", "navigate"), ("Sec-Fetch-User", "?1"), ("Sec-Fetch-Dest", "document"), ("Referer", "https://www.startpage.com/"), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-GB,en-US;q=0.9,en;q=0.8"), ("Connection", "close")}
TYPE:STANDARD
$"language=deutsch&abp=-1&lui=deutsch&prfe=b126ccae90a4470f4209ffa74e981d8bfd424470b1bf0f689ab468f66412670df83212ec2f0ef7e712f6f1cd6ed58c4ba29355b7b9adb3db62bda232ca44ed1c9e362256615508d71bde1d474c481c38&t=default&query=<regexReplaceOutput>&cat=web&page=1"
"text/html; charset=utf-8"
ENDBLOCK
BLOCK:Parse
LABEL:Parse Results
input = @data.SOURCE
leftDelim = "class=\"w-gl__result-url result-link\"\\nhref=\""
rightDelim = "\""
cssSelector = ".w-gl__result-url"
attributeName = "innerHTML"
xPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w-gl__result-url\", \" \" ))]"
pattern = "class\\=\\\"w\\-gl\\_\\_result\\-url\\ result\\-link\\\"\\n(\\ )+href\\="
multiLine = True
RECURSIVE
MODE:CSS
=> VAR @parsepage
ENDBLOCK
BLOCK:Parse
LABEL:Parse clear Results
input = @parsepage
pattern = "(?<protocol>\\w*)\\:\\/\\/(?:(?:(?<thld>[\\w\\-]*)(?:\\.))?(?<sld>[\\w\\-]*))\\.(?<tld>\\w*)(?:\\:(?<port>\\d*))?"
outputFormat = "[0]"
multiLine = True
RECURSIVE
MODE:Regex
=> VAR @parseregex
ENDBLOCK
BLOCK:FileWriteLines
LABEL:write to temp file
path = "startpagescraper\\temp.txt"
lines = @parseregex
ENDBLOCK
BLOCK:HttpRequest
LABEL:Startpage Page 2
url = "https://www.startpage.com/sp/search"
method = POST
customCookies = ${}
customHeaders = {("Cache-Control", "max-age=0"), ("Sec-Ch-Ua", "\";Not A Brand\";v=\"99\", \"Chromium\";v=\"94\""), ("Sec-Ch-Ua-Mobile", "?0"), ("Sec-Ch-Ua-Platform", "\"Windows\""), ("Upgrade-Insecure-Requests", "1"), ("Origin", "https://www.startpage.com"), ("Content-Type", "application/x-www-form-urlencoded"), ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), ("Sec-Fetch-Site", "same-origin"), ("Sec-Fetch-Mode", "navigate"), ("Sec-Fetch-User", "?1"), ("Sec-Fetch-Dest", "document"), ("Referer", "https://www.startpage.com/"), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-GB,en-US;q=0.9,en;q=0.8"), ("Connection", "close")}
TYPE:STANDARD
$"language=deutsch&abp=-1&lui=deutsch&prfe=b126ccae90a4470f4209ffa74e981d8bfd424470b1bf0f689ab468f66412670df83212ec2f0ef7e712f6f1cd6ed58c4ba29355b7b9adb3db62bda232ca44ed1c9e362256615508d71bde1d474c481c38&t=default&query=<input.CODE>&cat=web&page=2"
"text/html; charset=utf-8"
ENDBLOCK
BLOCK:Parse
LABEL:Parse Results
input = @data.SOURCE
leftDelim = "class=\"w-gl__result-url result-link\"\\nhref=\""
rightDelim = "\""
cssSelector = ".w-gl__result-url"
attributeName = "innerHTML"
xPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w-gl__result-url\", \" \" ))]"
pattern = "class\\=\\\"w\\-gl\\_\\_result\\-url\\ result\\-link\\\"\\n(\\ )+href\\="
multiLine = True
RECURSIVE
MODE:CSS
=> VAR @parsepage
ENDBLOCK
BLOCK:Parse
LABEL:Parse clear Results
input = @parsepage
pattern = "(?<protocol>\\w*)\\:\\/\\/(?:(?:(?<thld>[\\w\\-]*)(?:\\.))?(?<sld>[\\w\\-]*))\\.(?<tld>\\w*)(?:\\:(?<port>\\d*))?"
outputFormat = "[0]"
multiLine = True
RECURSIVE
MODE:Regex
=> VAR @parseregex
ENDBLOCK
BLOCK:FileAppendLines
LABEL:write to temp file
path = "startpagescraper\\temp.txt"
lines = @parseregex
ENDBLOCK
BLOCK:HttpRequest
LABEL:Startpage Page 3
url = "https://www.startpage.com/sp/search"
method = POST
customCookies = ${}
customHeaders = {("Cache-Control", "max-age=0"), ("Sec-Ch-Ua", "\";Not A Brand\";v=\"99\", \"Chromium\";v=\"94\""), ("Sec-Ch-Ua-Mobile", "?0"), ("Sec-Ch-Ua-Platform", "\"Windows\""), ("Upgrade-Insecure-Requests", "1"), ("Origin", "https://www.startpage.com"), ("Content-Type", "application/x-www-form-urlencoded"), ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), ("Sec-Fetch-Site", "same-origin"), ("Sec-Fetch-Mode", "navigate"), ("Sec-Fetch-User", "?1"), ("Sec-Fetch-Dest", "document"), ("Referer", "https://www.startpage.com/"), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-GB,en-US;q=0.9,en;q=0.8"), ("Connection", "close")}
TYPE:STANDARD
$"language=deutsch&abp=-1&lui=deutsch&prfe=b126ccae90a4470f4209ffa74e981d8bfd424470b1bf0f689ab468f66412670df83212ec2f0ef7e712f6f1cd6ed58c4ba29355b7b9adb3db62bda232ca44ed1c9e362256615508d71bde1d474c481c38&t=default&query=<input.CODE>&cat=web&page=3"
"text/html; charset=utf-8"
ENDBLOCK
BLOCK:Parse
LABEL:Parse Results
input = @data.SOURCE
leftDelim = "class=\"w-gl__result-url result-link\"\\nhref=\""
rightDelim = "\""
cssSelector = ".w-gl__result-url"
attributeName = "innerHTML"
xPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w-gl__result-url\", \" \" ))]"
pattern = "class\\=\\\"w\\-gl\\_\\_result\\-url\\ result\\-link\\\"\\n(\\ )+href\\="
multiLine = True
RECURSIVE
MODE:CSS
=> VAR @parsepage
ENDBLOCK
BLOCK:Parse
LABEL:Parse clear Results
input = @parsepage
pattern = "(?<protocol>\\w*)\\:\\/\\/(?:(?:(?<thld>[\\w\\-]*)(?:\\.))?(?<sld>[\\w\\-]*))\\.(?<tld>\\w*)(?:\\:(?<port>\\d*))?"
outputFormat = "[0]"
multiLine = True
RECURSIVE
MODE:Regex
=> VAR @parseregex
ENDBLOCK
BLOCK:FileAppendLines
LABEL:write to temp file
path = "startpagescraper\\temp.txt"
lines = @parseregex
ENDBLOCK
BLOCK:HttpRequest
LABEL:Startpage Page 4
url = "https://www.startpage.com/sp/search"
method = POST
customCookies = ${}
customHeaders = {("Cache-Control", "max-age=0"), ("Sec-Ch-Ua", "\";Not A Brand\";v=\"99\", \"Chromium\";v=\"94\""), ("Sec-Ch-Ua-Mobile", "?0"), ("Sec-Ch-Ua-Platform", "\"Windows\""), ("Upgrade-Insecure-Requests", "1"), ("Origin", "https://www.startpage.com"), ("Content-Type", "application/x-www-form-urlencoded"), ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), ("Sec-Fetch-Site", "same-origin"), ("Sec-Fetch-Mode", "navigate"), ("Sec-Fetch-User", "?1"), ("Sec-Fetch-Dest", "document"), ("Referer", "https://www.startpage.com/"), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-GB,en-US;q=0.9,en;q=0.8"), ("Connection", "close")}
TYPE:STANDARD
$"language=deutsch&abp=-1&lui=deutsch&prfe=b126ccae90a4470f4209ffa74e981d8bfd424470b1bf0f689ab468f66412670df83212ec2f0ef7e712f6f1cd6ed58c4ba29355b7b9adb3db62bda232ca44ed1c9e362256615508d71bde1d474c481c38&t=default&query=<input.CODE>&cat=web&page=4"
"text/html; charset=utf-8"
ENDBLOCK
BLOCK:Parse
LABEL:Parse Results
input = @data.SOURCE
leftDelim = "class=\"w-gl__result-url result-link\"\\nhref=\""
rightDelim = "\""
cssSelector = ".w-gl__result-url"
attributeName = "innerHTML"
xPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w-gl__result-url\", \" \" ))]"
pattern = "class\\=\\\"w\\-gl\\_\\_result\\-url\\ result\\-link\\\"\\n(\\ )+href\\="
multiLine = True
RECURSIVE
MODE:CSS
=> VAR @parsepage
ENDBLOCK
BLOCK:Parse
LABEL:Parse clear Results
input = @parsepage
pattern = "(?<protocol>\\w*)\\:\\/\\/(?:(?:(?<thld>[\\w\\-]*)(?:\\.))?(?<sld>[\\w\\-]*))\\.(?<tld>\\w*)(?:\\:(?<port>\\d*))?"
outputFormat = "[0]"
multiLine = True
RECURSIVE
MODE:Regex
=> VAR @parseregex
ENDBLOCK
BLOCK:FileAppendLines
LABEL:write to temp file
path = "startpagescraper\\temp.txt"
lines = @parseregex
ENDBLOCK
BLOCK:HttpRequest
LABEL:Startpage Page 5
url = "https://www.startpage.com/sp/search"
method = POST
customCookies = ${}
customHeaders = {("Cache-Control", "max-age=0"), ("Sec-Ch-Ua", "\";Not A Brand\";v=\"99\", \"Chromium\";v=\"94\""), ("Sec-Ch-Ua-Mobile", "?0"), ("Sec-Ch-Ua-Platform", "\"Windows\""), ("Upgrade-Insecure-Requests", "1"), ("Origin", "https://www.startpage.com"), ("Content-Type", "application/x-www-form-urlencoded"), ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), ("Sec-Fetch-Site", "same-origin"), ("Sec-Fetch-Mode", "navigate"), ("Sec-Fetch-User", "?1"), ("Sec-Fetch-Dest", "document"), ("Referer", "https://www.startpage.com/"), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-GB,en-US;q=0.9,en;q=0.8"), ("Connection", "close")}
TYPE:STANDARD
$"language=deutsch&abp=-1&lui=deutsch&prfe=b126ccae90a4470f4209ffa74e981d8bfd424470b1bf0f689ab468f66412670df83212ec2f0ef7e712f6f1cd6ed58c4ba29355b7b9adb3db62bda232ca44ed1c9e362256615508d71bde1d474c481c38&t=default&query=<input.CODE>&cat=web&page=5"
"text/html; charset=utf-8"
ENDBLOCK
BLOCK:Parse
LABEL:Parse Results
input = @data.SOURCE
leftDelim = "class=\"w-gl__result-url result-link\"\\nhref=\""
rightDelim = "\""
cssSelector = ".w-gl__result-url"
attributeName = "innerHTML"
xPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w-gl__result-url\", \" \" ))]"
pattern = "class\\=\\\"w\\-gl\\_\\_result\\-url\\ result\\-link\\\"\\n(\\ )+href\\="
multiLine = True
RECURSIVE
MODE:CSS
=> VAR @parsepage
ENDBLOCK
BLOCK:Parse
LABEL:Parse clear Results
input = @parsepage
pattern = "(?<protocol>\\w*)\\:\\/\\/(?:(?:(?<thld>[\\w\\-]*)(?:\\.))?(?<sld>[\\w\\-]*))\\.(?<tld>\\w*)(?:\\:(?<port>\\d*))?"
outputFormat = "[0]"
multiLine = True
RECURSIVE
MODE:Regex
=> VAR @parseregex
ENDBLOCK
BLOCK:FileAppendLines
LABEL:write to temp file
path = "startpagescraper\\temp.txt"
lines = @parseregex
ENDBLOCK
BLOCK:FileReadLines
LABEL:read from temp file
path = "startpagescraper\\temp.txt"
=> VAR @fileReadLinesOutput
ENDBLOCK
BLOCK:RemoveDuplicates
list = @fileReadLinesOutput
=> CAP @removeDuplicatesOutput
ENDBLOCK
BLOCK:FileWriteLines
LABEL:Write to end file
path = "startpagescraper\\domains.txt"
lines = @removeDuplicatesOutput
ENDBLOCK
BLOCK:FileDelete
path = "startpagescraper\\temp.txt"
ENDBLOCK