Hi guys
Ive got my script up and running and it works fine except for one issue, the old remote machine that is serving the pages is somewhat slow, partly due to its speed, and partly because of the pages are huge and take a long while to compile. So I can sit there waiting for 50 seconds before the server decides to spit out the page.
This is causing me time out issues. Ive set cURL timeout to 300seconds, PHP execution time (local) to 300 secs, but am still getting issues, I think I need a way to feed something to the browser whilst cURL is waiting. When processing the smaller directories the script runs fine for several minutes without problems.
Any ideas
<?php
if(!ini_set("max_execution_time", "300")){
echo 'new ex time not set';
}
for($i = 0; $i < 40000; $i++){
echo ' '; // extra spaces
}
// give the browser something to stary
flush();
// connect to database
code here
$baseurl = 'http://website.net'; // Base URL
$keeplooping = 1; //flag
// Check if finished last time
$sql = mysql_query("SELECT Directory
FROM lenadir
WHERE Filesread = '0'
");
if (mysql_num_rows($sql) == 0){ // If all directories were filed start again by clearing out
// delete all directory entries to start again
$bin = mysql_query("DELETE FROM lenafile
WHERE '1' = '1'
");
}
// get first directory
$row = mysql_fetch_array($sql); //get first directory not read from list
$directory = $row['Directory']; //set search directory for next pass
while($keeplooping == 1){ // Keep going round till told to stop
// Read and decode HTML Response
set_time_limit(300);
$cURL = curl_init();
curl_setopt($cURL, CURLOPT_URL, $baseurl.$directory);
curl_setopt($cURL, CURLOPT_HEADER, 0);
curl_setopt($cURL, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($cURL, CURLOPT_CONNECTTIMEOUT, 300);
curl_setopt($cURL, CURLOPT_TIMEOUT, 300);
curl_setopt($cURL, CURLOPT_HTTPAUTH, CURLAUTH_ALL);
curl_setopt($cURL, CURLOPT_USERPWD, "user:pass");
$strPage = curl_exec($cURL);
curl_close($cURL);
echo $strPage;
// Parse HTML page to remove links
$regexp = "<a\\s[^>]*href=(\\"??)([^\\" >]*?)\\\\1[^>]*>(.*)<\\/a>";
if(preg_match_all("/$regexp/siU", $strPage, $matches, PREG_SET_ORDER)) {
// $matches[2] = array of link addresses with path
// $matches[3] = array of link text
}
array_splice($matches, 0, 9); //remove first 9 links as they are part of FSP program
array_splice($matches, -1, 1); //remove last link as its part of FSP program
// Step through each link, only take notice of directories, add to dirlist in database
foreach($matches as $element){
$link = $element[2];
$searchtext = preg_replace("/[^a-zA-Z0-9s]/", " ", $element[3]); //Remove all non Alphanum chars
$searchtext = preg_replace('/\\s\\s+/', ' ', $searchtext); // Remove double whitespaces
if ($link[strlen($link)-1] != '/'){ //Only process is link isnt a /Directory/
echo '<a href="http://website.net' . $link . '">' . $searchtext . '</a><br />';
$sql = @mysql_query("INSERT INTO lenafile
SET Path = '$link',
Search = '$searchtext'
");
if($sql){echo 'OK';}else{echo 'NO';}
} // end if
} // end foreach
// flag directory as read if not root as root isnt in database
$sql = mysql_query("UPDATE lenadir
SET Filesread = '1'
WHERE Directory = '$directory'
");
// Read directorys not yet searched into array
$sql = mysql_query("SELECT Directory
FROM lenadir
WHERE Filesread = '0'
");
if (mysql_num_rows($sql) == 0){ // If all directories are files we are finished
$keeplooping = 0; //cancel loop
}else{ // if not search first unsearched directory
$row = mysql_fetch_array($sql); //get first directory not read from list
$directory = $row['Directory']; //set search directory for next pass
} // end ifelse
} // end while looping=1
echo 'All Files In Directories Stored';
?>