Hi guys I need some pointers to how I can make my web scraper application launch on a schedule.
Mo node application uses these key dependencies among other
- cheerio
- express
- ejs
Here is what my index.js file looks like:
var scraper = require(__dirname + '/scripts/scraper.js'); // < scraper
var express = require('express');
var path = require('path');
var app = express();
var MongoClient = require('mongodb').MongoClient;
var url = "mongodb://127.0.0.1:27017/test";
app.use(express.static(__dirname + '/public'));
// set the view engine to ejs
app.set('view engine', 'ejs');
// index page
app.get('/', function(req, res) {
MongoClient.connect(url, function(err, db) {
if (err) throw err;
var dbo = db.db("mydb");
dbo.collection("customers").find({}).toArray(function(err, result) {
if (err) throw err;
res.render('pages/index', {
result: result,
});
db.close();
});
});
});
app.listen(3001);
console.log(‘navigate to: http://178.62.253.206:3001’);
When I navigate to the ip link all of the data is loaded as intended into that web page. However in order to update the data I need to navigate to http://178.62.253.206:8080 (the port the scraper is running on)
What I wonder is how can I make my web server run on a schedule. My overall plan is to add more scraper as more information is needed for my web page …
Any suggestions as to how I can make this work would be much appreciated
Frederik