Dear Members,
I had worked out on the issue below with simple logic. But was unable to achive the output asd required. I would love to have your valuable suggetions(if Code) for the Same : Really Appreciate the Valuabl;e time you Put on this Issue. Thank you All. Awaiting for a better Solution:
Here is the Situation:
I have some files which have 4 html files in it and I have created a php Parser file with the Following Code to Scrape the Date out of these files one by one and put them into a CSV file or Database to use it later on. The Current PHP File code is given below:
<?php
$dbhost = "localhost";
$dbuser = "";
$dbpass = "";
$conn = mysql_pconnect($dbhost, $dbuser, $dbpass);
//mysql_select_db ($dbname);
$dir="dhe"; //The Files Folder - Village Folder
$secondphase=trim($_POST["secondPhase"]);
$newdb = formattext($newdb);
//$newdb= "dt_".$newdb;
$product_cat= $newdb."_id";
global $csvline;
startParse($newdb,$dir,$product_cat);
function startParse($newdb,$dir,$product_cat) // FOR PARSING SECOND PHASE
{
$newdb= "dt_".$newdb;
mysql_select_db($newdb);
$files=getfileList($dir);
foreach($files as $file)
{
$csvline="";
$newline = 0;
echo " F I L E :: ".$file;
//$product_name = format_product($file);
$htmlfile = $dir."/".$file;
$htmlDoc = new DomDocument();
@$htmlDoc->loadHtmlFile($htmlfile);
$xpath = new DOMXPath( $htmlDoc );
$lblName= $xpath->query("id('lblName')");
$lblFathersName= $xpath->query("id('lblFathersName')");
$lblHouseNo= $xpath->query("id('lblHouseNo')");
$lblBlock= $xpath->query("id('lblBlock')");
$lblDist= $xpath->query("id('lblDist')");
$lblState= $xpath->query("id('lblState')");
$lblPin= $xpath->query("id('lblPin')");
$lblBarcode= $xpath->query("id('lblBarcode')");
$lblSelectiontype= $xpath->query("id('lblSelectiontype')");
$lblMRNo= $xpath->query("id('lblMRNo')");
$lblMRNoR= $xpath->query("id('lblMRNoR')");
$lblFather= $xpath->query("id('lblFather')");
$lblMother= $xpath->query("id('lblMother')");
$lblDob= $xpath->query("id('lblDob')");
$lblPhno= $xpath->query("id('lblPhno')");
$lblMob= $xpath->query("id('lblMob')");
$lblGender= $xpath->query("id('lblGender')");
// Fetching and storing Data starts
getDatafromHTML($lblName);
getDatafromHTML($lblFathersName);
getDatafromHTML($lblHouseNo);
getDatafromHTML($lblBlock);
getDatafromHTML($lblDist);
getDatafromHTML($lblState);
getDatafromHTML($lblPin);
getDatafromHTML($lblBarcode);
getDatafromHTML($lblSelectiontype);
getDatafromHTML($lblMRNo);
getDatafromHTML($lblMRNoR);
getDatafromHTML($lblFather);
getDatafromHTML($lblMother);
getDatafromHTML($lblDob);
getDatafromHTML($lblPhno);
getDatafromHTML($lblMob);
getDatafromHTML($lblGender);
}
}
function getDatafromHTML($nodelist)
{
foreach ($nodelist as $n){
//echo $n->nodeValue."<br/>";
if ($newline == 0){
echo " inIF : ";
$csvline .= $n->nodeValue." #";
echo $csvline."<br />";
}
else{
echo " in ELSEIF";
echo $csvline."<br />";
}
}
}
// Returns List of Files (Array) from Directory
function getfileList($dir)
{
if ($handle = opendir($dir))
{
//echo "Directory handle: $handle\
";
//echo "Files:\
";
/* This is the correct way to loop over the directory. */
while (false !== ($file = readdir($handle)))
{
//echo "$file<br>";
if(!is_dir($file))
{
$filelist[]=$file;
}
}
}
return $filelist;
closedir($handle);
}
//Formating
function formattext($title)
{
$title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
// Remove percent signs that are not part of an octet.
$title = str_replace('%', '', $title);
$title = str_replace('+',' ', $title);
$title = trim($title);
// Restore octets.
$title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
$title = strtolower($title);
$title = preg_replace('/&.+?;/', '', $title); // kill entities
$title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
$title = preg_replace('/\\s+/', '_', $title);
$title = preg_replace('|-+|', '_', $title);
//$title = trim($title, '-');
return $title;
}
?>