Nodejs Launching web scraper script on a schedule?

Hi guys I need some pointers to how I can make my web scraper application launch on a schedule.

Mo node application uses these key dependencies among other

  • cheerio
  • express
  • ejs

Here is what my index.js file looks like:

var scraper = require(__dirname + '/scripts/scraper.js'); // < scraper 
var express = require('express');
var path = require('path');
var app = express();

var MongoClient = require('mongodb').MongoClient;
var url = "mongodb://127.0.0.1:27017/test";

app.use(express.static(__dirname + '/public'));

// set the view engine to ejs
app.set('view engine', 'ejs');

// index page 
app.get('/', function(req, res) {
MongoClient.connect(url, function(err, db) {
if (err) throw err;
var dbo = db.db("mydb");
dbo.collection("customers").find({}).toArray(function(err, result) {
  if (err) throw err;
  res.render('pages/index', {
    result: result,
  });
  db.close();
});

});
});

app.listen(3001);

console.log(‘navigate to: http://178.62.253.206:3001’);

When I navigate to the ip link all of the data is loaded as intended into that web page. However in order to update the data I need to navigate to http://178.62.253.206:8080 (the port the scraper is running on)

What I wonder is how can I make my web server run on a schedule. My overall plan is to add more scraper as more information is needed for my web page …

Any suggestions as to how I can make this work would be much appreciated

Frederik

This topic was automatically closed 91 days after the last reply. New replies are no longer allowed.