SitePoint Sponsor

User Tag List

Results 1 to 5 of 5

Thread: link extractor

  1. #1
    SitePoint Zealot amri's Avatar
    Join Date
    Dec 2002
    Posts
    133
    Mentioned
    0 Post(s)
    Tagged
    0 Thread(s)

    link extractor

    I got this script from www.dynamicdrive.com
    This is the original scripts:
    Code:
    <script language="JavaScript1.2">
    <!--
    function extractlinks(){
    if (document.all||document.getElementById){
    if (document.all)
    var links=document.all.tags("A")
    else if (document.getElementById)
    var links=document.getElementsByTagName("A")
    var total=links.length
    var win2=window.open("","","menubar,scrollbars,status")
    win2.document.write("<h2>Total Links="+total+"</h2><br>")
    for (i=0;i<total;i++){
    win2.document.write('<a href="'+links[i]+'">'+links
    [i].innerHTML+'</a><br>')
    
    }
    win2.document.close()
    }
    }
    //-->
    </script>
    
    
    <button onClick="extractlinks()">Extract Links</button>
    It works, but it also extract all kind of links. In my page I set _blank target for link to outside myside. So how to modify it so it just extract link with "target='_blank'" attributes ?

    Thanks

  2. #2
    ♪♪ ♪ ♪ ♪ ♪♪ ♪ ♪♪ Markdidj's Avatar
    Join Date
    Sep 2002
    Location
    Bournemouth, South UK
    Posts
    1,551
    Mentioned
    1 Post(s)
    Tagged
    0 Thread(s)
    what do you get when you alert getElementsByTagName("a").target for the links?
    At a guess It's something like this, but I don't know.
    LiveScript: Putting the "Live" Back into JavaScript
    if live output_as_javascript else output_as_html end if

  3. #3
    SitePoint Wizard silver trophy
    Join Date
    May 2003
    Posts
    1,843
    Mentioned
    0 Post(s)
    Tagged
    0 Thread(s)
    Here, try this:
    Code:
    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" 
        "http://www.w3.org/TR/html4/loose.dtd">
    
    <html>
    <head>
    <title>Hello!</title>
    <style type="text/css">
    a:link, a:visited {font-family:tahoma;color:darkgreen;}
    </style>
    </head>
    <body>
    <a href="url1.htm" target="_blank" title="Link #1">new window</a><br />
    <a href="url2.htm" target="_blank">another new window</a><br />
    <a href="url3.htm">same window</a><br />
    <a href="url4.htm" target="_blank"><strong>really</strong> new window</a><br />
    <a href="url5.htm">same window</a><br />
    <a href="url6.htm" target="_blank">no kidding, a <em>new</em> window</a><br />
    <a href="url7.htm" target="_blank">yes, yes, new window</a><br />
    <a href="url8.htm" title="Link #8">same window</a><br />
    <a href="url9.htm" title="Link #9">same window</a><br />
    <a href="url10.htm" target="_blank">damn, if it ain't a <span style="text-transform:uppercase;">new window</span></a><br />
    <br /><br />
    <script type="text/javascript" language="javascript" src="list_links.js"></script>
    </body>
    </html>
    [list_links.js]
    Code:
    function list_links(doc)
    {
    	doc = (doc) ? doc : document;
    	var links, link, l = 0, count = 1;
    	HTML = '';
    	while (link = doc.links[l++])
    	{
    		if (link.target == '_blank') //filter
    		{
    			HTML += '<h5>LINK&nbsp;' + count++ + '</h5>';
    			HTML += ((link.href) ? '<b>href: </b>' + link.href + '<br>' : '');
    			HTML += ((link.target) ? '<b>target: </b>' + link.target + '<br>' : '');
    			HTML += ((link.title) ? '<b>title: </b>' + link.title + '<br>' : '');
    			HTML += '<b>text: </b>' + link.innerHTML;
    		}
    	}
    	HTML += '</body></html>';
    	var header = '<html><head><style>body{font:11px verdana;}b{color:darkred;}</style></head><body>';
    	header += '<h4><u>LINK LISTING</u></h4>' + ((doc.title) ? '<b>document title: </b>' + doc.title : '');
    	header += '<br><b>document url: </b>' + location.href;
    	header += '<br><b>number of links (as filtered): </b>' + --count + '<br>';
    	HTML = header + HTML;
    
    	var w = screen.availWidth * .4;
    	var h = screen.availHeight * .8;
    	var l = screen.availWidth - w - 40;
    	var t = (screen.availHeight - t) / 2;
    	x = window.open('javascript&#58;opener.HTML', 'linklist', 'width='+w+',height='+h+',left='+l+',top=20,menubar,scrollbars,resizable');
    	if (x && !x.closed)
    		x.focus();
    }
    
    document.writeln('<button onclick="list_links(document)">List Them Links</button>');
    Last edited by adios; Oct 26, 2003 at 18:29.
    ::: certified wild guess :::

  4. #4
    SitePoint Zealot amri's Avatar
    Join Date
    Dec 2002
    Posts
    133
    Mentioned
    0 Post(s)
    Tagged
    0 Thread(s)
    Thanks

  5. #5
    I'll take mine raw silver trophy MikeFoster's Avatar
    Join Date
    Dec 2002
    Location
    Alabama, USA
    Posts
    2,560
    Mentioned
    0 Post(s)
    Tagged
    0 Thread(s)
    Here's another one
    Code:
    window.onload = function()
    {
      var lst = xGetElementsByAttribute('A', 'target', '_blank');
      alert(lst.length + '\n' + lst);
    }
    
    /* xGetElementsByAttribute
       Return an array of all sTag elements whose sAtt attribute matches sRE.
       sAtt can also be a property name but the property must be of type string.
    */   
    function xGetElementsByAttribute(sTag, sAtt, sRE)
    {
      var re = new RegExp(sRE, 'i');
      var a, ele, list = new Array();
      if (document.getElementsByTagName) {ele = document.getElementsByTagName(sTag);}
      else if (document.all) {ele = document.all.tags(sTag);}
      if (ele) {
        for (var i = 0; i < ele.length; ++i) {
          a = ele[i].getAttribute(sAtt);
          if (!a) {a = ele[i][sAtt];}
          if (typeof(a)=='string' && a.search(re) != -1) {
            list[list.length] = ele[i];
          }
        }
      }
      return list;
    }
    Edit:

    After some testing, I revised the above function, and provided some test results below. mf 9Dec03


    Code:
    /*** Test Results (Win2K) ***/
    
    ('div', 'id', 'Col');        // op, moz, ie
    
    ('span', 'class', 'fw');     // op, moz, !ie
    ('span', 'className', 'fw'); // op, moz, ie
    
    ('input', 'name', 'inp');    // op, moz, ie
    ('input', 'name', '[2-4]');  // op, moz, ie
    ('input', 'type', 'submit'); // op, moz, ie
    
    ('textarea', 'rows', '14');  // op, moz, !ie
    ('textarea', 'rows', '.');   // op, moz, !ie
    ('textarea', 'id', '.');     // op, moz, ie
    
    ('form', 'onsubmit', '.');   // op, moz, !ie
    ('form', 'name', '.');       // op, moz, ie
    
    ('a', 'href', '.');          // op, moz, ie
    ('a', 'target', '.');        // op, moz, ie
    Last edited by MikeFoster; Dec 9, 2003 at 11:25.


Bookmarks

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •