Scraper with Puppeteer login returns just one element of the array

This code is supposed to loop through the urls that get scraped from the scrapeProductPage function. But before looping, it needs to log in so that it can obtain the prices. The prices are only displayed to logged in users. Instead of looping through the urls it just returns the scraped data from one page. The error I get is “MaxListenersExceededWarning: Possible EventEmitter memory leak detected”.

const request = require("request-promise");
const cheerio = require("cheerio");
const ObjectsToCsv = require("objects-to-csv");
const puppeteer = require('puppeteer');


const url = "https://www.example.com";

const scrapeResults = [];

async function scrapeProductPage() {
  try {
	const htmlResult = await request.get(url);
    const $ = await cheerio.load(htmlResult);

$("td.productListing-data > a[style='position:relative;float:left;']").each((index, element) => {
     let url = $(element).attr("href");
	 url = "https\://www.example.com/" + url;
      const scrapeResult = { url };
      scrapeResults.push(scrapeResult);
    });
    return scrapeResults;
} catch (err) {
    console.error(err);
}
}

async function scrapeDescription(productsWithImages) {
process.setMaxListeners(0);
  const browser = await puppeteer.launch({
      headless: false
  }); 
  
  const page = await browser.newPage();
  await page.goto('https://www.example.com/login');
  
  await page.waitFor(500);

  await page.waitFor('input[name="email_address"]');
  await page.type('input[name="email_address"]', 'example@gmail.com');
  await page.type('input[name="password"]', '123test');
  await page.click('#btnLogin');

return await Promise.all(
   productsWithImages.map(async job => {
     try {
		 await page.goto(job.url, { waitUntil: "load" });
     	const content = await page.content();
        const $ = await cheerio.load(content);
	
		job.main_img = $('img#main_img').attr('src');
		job.name = $('h2').text();
		job.price =  $("td.products_info_price").text();
	
        return job;	
      } catch (error) {
        console.error(error);
      }
    })
  );
}



async function saveDataToCsv(data) {
  const csv = new ObjectsToCsv(data);
  console.log(csv);
}

async function scrapeWona() {
  const productsWithImages = await scrapeProductPage();
  const wonaFullData = await scrapeDescription(productsWithImages);
  await saveDataToCsv(productsWithImages);
}

scrapeWona();

I watched a video on udemy.com called “Web Scraping in Nodejs” and created by Stefan Hyltoft. With help of this video I was able to copy new code that worked.

This topic was automatically closed 91 days after the last reply. New replies are no longer allowed.