SitePoint Sponsor

User Tag List

Results 1 to 2 of 2
  1. #1
    SitePoint Evangelist
    Join Date
    Dec 2006
    Posts
    430
    Mentioned
    0 Post(s)
    Tagged
    0 Thread(s)

    Parseing using simple dom

    Hi Guys,

    What i'm trying to do is parse some html using the simple php dom class, i managed to parse a site already but i'm having trouble with this particular site.

    Code:

    PHP Code:
    <?php
    if (isset($_POST['submitAddClickbankProducts'])) {
        
    $searchString mysql_real_escape_string($_POST['searchName']);
        
    $catID        = (int) $_POST['searchCatID'];
        
        
    // Clean the string
        
    $searchString str_replace('"',"",$searchString);    
          
    $searchString str_replace(" ","+",$searchString);
        
        if (empty(
    $searchString)) {
            print 
    "<p class=\"fcs-message-error\">You never entered a root search term.</p>";
        } 
    //empty($searchString)
        
    else {
            
    $search_url "http://www.clickbank.com/mkplSearchResult.htm?dores=true&includeKeywords=$searchString";
            
    $ch         curl_init();
            
    curl_setopt($chCURLOPT_USERAGENT'Firefox (WindowsXP) - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6');
            
    curl_setopt($chCURLOPT_URL$search_url);
            
    curl_setopt($chCURLOPT_FAILONERRORtrue);
            
    curl_setopt($chCURLOPT_AUTOREFERERtrue);
            
    curl_setopt($chCURLOPT_RETURNTRANSFERtrue);
            
    curl_setopt($chCURLOPT_TIMEOUT60);
            
    $html curl_exec($ch);
            if (!
    $html) {
                echo 
    "<p class=\"fcs-message-error\">cURL error:" curl_error($ch) . " (Error Number " curl_errno($ch) . ")</p>";
            } 
    //!$html
            
    curl_close($ch);
            
            
    $targetString "No results found";
            
            if (
    stristr($html$targetString) == true) {
                echo 
    "<p class=\"fcs-message-error\">No clickbank products were found.</p>";
                include(
    "../inc/inc-footer-admin.php");
                exit;
            } 
    //stristr($html, $targetString) == true
            
    else {
                
    $xpath   file_get_html($search_url);
                
    $results $xpath->find('div[class=results]'0)->find('tr[class=result]');
                
                foreach (
    $results as $result) {
                    
    $pdcT   strip_tags($result->find('td[class=details]'0));
                    
    //$pdcD   = strip_tags($result->find('div[class=description]', 0));
                    
    $pdcA   $result->find('a'1);
                    
    $pdcU   $pdcA->href;
                    
    $hopurl str_replace("zzzzz""graham25s"$pdcU);
                    
    $hopurl trim($hopurl);
                    
                    echo 
    "<b>Name:</b>  " $pdcT;
                    echo 
    "<br />";
                    echo 
    "<b>Desc:</b>  " $pdcD;
                    echo 
    "<br />";
                    echo 
    "<b>URLs:</b> " $hopurl;
                    echo 
    "<br /><hr />";
                                  
                    
    $titleWithoutprice explode('$'$pdcT);
                    
    $stringCleaned     ereg_replace("[^A-Za-z0-9]"" "$titleWithoutprice[0]);
                    
    $stringCleaned     rtrim($stringCleaned);
                    
    $stringCleaned     ltrim($stringCleaned);
                    
    $stringCleaned     ucwords($stringCleaned);
                    
                    
    $ignoreWords = array(
                        
    "Commission",
                        
    "commission",
                        
    "affiliate",
                        
    "Affiliate",
                        
    "affiliates",
                        
    "Affiliates",
                        
    "Conversion",
                        
    "conversion",
                        
    "conversions",
                        
    "Conversions"
                    
    );
                    
                    
    $newDescription str_replace($ignoreWords"<a href=\"$hopurl\" target=\"_blank\"><b>CLICK HERE TO GET A SPECIAL DISCOUNT OFF THIS PRODUCT! LIMITED!</b></a>"$pdcD);
                    
                  
                } 
    //$results as $result
                
                
            
    }
            
            
            if (
    $I) {
                print 
    "<p class=\"fcs-message-success\">You have successfully populated from clickbank.</p>";
            } 
    //$I
            
    else {
                print 
    '<p class="fcs-message-error">OOPS, an error has occured please contact the site administrator.</p>';
            }
        }
    //isset($_POST['submitAddClickbankProducts'])
    ?>
    An example URL is:

    http://www.clickbank.com/mkplSearchR...ds=cure+eczema

    I'm just trying to parse the product: Name, Description & Hop URL.

    The way my code is now the "Name:" when printed out brings me back: Name: 14 Days Eczema Cure. All Natural Cure For Curing Eczema In 14 Days. Simple Step By Step Methods To Eliminate Eczema. Massive 75&#37; Commissions.

    All in 1 line where as it should be:

    14 Days Eczema Cure.

    All Natural Cure For Curing Eczema In 14 Days. Simple Step By Step Methods To Eliminate Eczema. Massive 75% Commissions.

    Both seperately.

    Any help would be appreciated.

    thanks guys

    Graham

  2. #2
    SitePoint Addict Zarin Denatrose's Avatar
    Join Date
    Jan 2009
    Location
    Surrey BC, Canada
    Posts
    309
    Mentioned
    0 Post(s)
    Tagged
    0 Thread(s)
    If you mean simple_html_dom, I have some code here that will get you the desired parsed parts:
    PHP Code:
    //This is assuming you've loaded the html into a simple_html_dom object called $xpath, of course.
    $items $xpath->find("td.details");

    foreach(
    $items as $item)
    {
        
    $item_header $item->find("a"0);
        
    $name $item_header->plaintext;
        
    $link $item_header->href;
        
    $description $item->find(".description"0)->plaintext;
        
        echo 
    $name," - ",$link,"<br>",$description,"\n<br><br>\n";

    You can modify it to fit your purposes.
    I hope this helps!


Bookmarks

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •