Want to Scrape the Data in the html/xml file to CSV/database Help Requested

Dear Members,

I had worked out on the issue below with simple logic. But was unable to achive the output asd required. I would love to have your valuable suggetions(if Code) for the Same : Really Appreciate the Valuabl;e time you Put on this Issue. Thank you All. Awaiting for a better Solution:

Here is the Situation:

I have some files which have 4 html files in it and I have created a php Parser file with the Following Code to Scrape the Date out of these files one by one and put them into a CSV file or Database to use it later on. The Current PHP File code is given below:


<?php
$dbhost = "localhost";
$dbuser = "";
$dbpass = "";
$conn = mysql_pconnect($dbhost, $dbuser, $dbpass);
//mysql_select_db ($dbname);
$dir="dhe"; //The Files Folder - Village Folder
$secondphase=trim($_POST["secondPhase"]);
$newdb = formattext($newdb);
//$newdb= "dt_".$newdb;
$product_cat= $newdb."_id";
global $csvline;
startParse($newdb,$dir,$product_cat);
function startParse($newdb,$dir,$product_cat) // FOR PARSING SECOND PHASE
{
$newdb= "dt_".$newdb;
mysql_select_db($newdb);
$files=getfileList($dir);
foreach($files as $file)
	{
	$csvline="";
	$newline = 0;
	echo " F I L E  :: ".$file;
	//$product_name = format_product($file);		

	$htmlfile = $dir."/".$file;
	$htmlDoc = new DomDocument();
	@$htmlDoc->loadHtmlFile($htmlfile);
	$xpath = new DOMXPath( $htmlDoc );

$lblName= $xpath->query("id('lblName')");
$lblFathersName= $xpath->query("id('lblFathersName')");
$lblHouseNo= $xpath->query("id('lblHouseNo')");
$lblBlock= $xpath->query("id('lblBlock')");
$lblDist= $xpath->query("id('lblDist')");
$lblState= $xpath->query("id('lblState')");
$lblPin= $xpath->query("id('lblPin')");
$lblBarcode= $xpath->query("id('lblBarcode')");
$lblSelectiontype= $xpath->query("id('lblSelectiontype')");
$lblMRNo= $xpath->query("id('lblMRNo')");
$lblMRNoR= $xpath->query("id('lblMRNoR')");
$lblFather= $xpath->query("id('lblFather')");
$lblMother= $xpath->query("id('lblMother')");
$lblDob= $xpath->query("id('lblDob')");
$lblPhno= $xpath->query("id('lblPhno')");
$lblMob= $xpath->query("id('lblMob')");
$lblGender= $xpath->query("id('lblGender')");
// Fetching and storing Data starts
getDatafromHTML($lblName);
getDatafromHTML($lblFathersName);
getDatafromHTML($lblHouseNo);
getDatafromHTML($lblBlock);
getDatafromHTML($lblDist);
getDatafromHTML($lblState);
getDatafromHTML($lblPin);
getDatafromHTML($lblBarcode);
getDatafromHTML($lblSelectiontype);
getDatafromHTML($lblMRNo);
getDatafromHTML($lblMRNoR);
getDatafromHTML($lblFather);
getDatafromHTML($lblMother);
getDatafromHTML($lblDob);
getDatafromHTML($lblPhno);
getDatafromHTML($lblMob);
getDatafromHTML($lblGender);
	}
}

function getDatafromHTML($nodelist)
{
foreach ($nodelist as $n){
		//echo $n->nodeValue."<br/>";
		if ($newline == 0){	
			echo " inIF  : ";
			$csvline .= $n->nodeValue." #";	
			echo $csvline."<br />";	
			}

		else{
			echo " in ELSEIF";
			echo $csvline."<br />";
			}
			}
			
}
// Returns List of Files (Array) from Directory
function getfileList($dir)
{
if ($handle = opendir($dir))
	{
		//echo "Directory handle: $handle\
";
		//echo "Files:\
";
		/* This is the correct way to loop over the directory. */
		while (false !== ($file = readdir($handle)))
		{
		//echo "$file<br>";
		if(!is_dir($file))
			{
				$filelist[]=$file;
			}

		}
	}
return $filelist;
closedir($handle);
}
//Formating
function formattext($title)
	{
	$title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
	// Remove percent signs that are not part of an octet.
	$title = str_replace('%', '', $title);
	$title = str_replace('+',' ', $title);
	$title = trim($title);
	// Restore octets.
	$title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
	$title = strtolower($title);
	$title = preg_replace('/&.+?;/', '', $title); // kill entities
	$title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
	$title = preg_replace('/\\s+/', '_', $title);
	$title = preg_replace('|-+|', '_', $title);
	//$title = trim($title, '-');
	return $title;
}


?>