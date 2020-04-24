Hey, everyone, I am writing a script (playing in my time) that will check a website on 505 errors so, in the end, it would be possible to run it in Gitlab CI using puppeteer.
Now I have three questions (suggestions are nice)
- How can I make sure the script stays on the domain (no subdomains or other domains)
- Is it possible to throw an error Gitlab CI can read?
- How can I make screenshots based on the page (removing the domain
http://example.com/whoo.htmlbecomes
whoo.htmland
http://example.com/whoo/jaah.htmlbecomes
/whoo/jaah.html(as folder)
const puppeteer = require('puppeteer');
async function lookup(toCheckLink, domain) {
let linkList = [];
const browser = await puppeteer.launch({headless: false});
// const browser = await puppeteer.launch();
const page = await browser.newPage();
await loop(page, toCheckLink);
async function loop(page, toCheckLink) {
let response = await page.goto(toCheckLink);
console.log(response.status());
if(response.status() === 505){
console.log('505 found, fatal error');
return false;
}
let hrefs = await page.$$eval('a', as => as.map(a => a.href));
for (const href of hrefs) {
if (href.indexOf(domain) !== -1 && href.indexOf('#') === -1 && href.indexOf('@') === -1) {
console.log(href.indexOf('#'));
if (!linkList.includes(href)) {
linkList.push(href);
let LinkReplace = href.replace(/\//g, '-');
let screenshotName = LinkReplace.replace(/^.*\/\/[^\/]+/, '');
// await page.screenshot({ path: './screenshots/' + screenshotName +'.jpg', type: 'jpeg' });
await loop(page, href)
}
}
}
}
await browser.close();
}