[code review] Inprogress css selector

Right taking a plunge in the deepend. Maybe a big ask this.

I’m having a go at writing a selector engine. It’s for personal use with my own sites, so doesn’t really need to be idiot proof.

It will be a fallback for querySelectorAll.

I started out just wanting something simple i.e. find ‘ul li.myClass’, but that wasn’t enough.

This selector is intended to be a module along with an events module, which will bolt onto a framework. The framework constructor will be along the lines of jquery with a dummy constructor, prototype.init etc.

My real question is about the design or design pattern of this particular module. It’s all getting a little bit messy and inconsistent.

There’s a line I keep reading which is ‘favour composition over inheritence’. I’m still none the wiser regarding that, but do get the impression that sticking rigidly to inheritence is problematic. Just not sure how or where to go about that.

So in conclusion pretty clueless:confused:

Rather than waffling on here’s the code. If extra comments or details are needed let me know. Also I’m not looking for exhaustive answers or re-writes, but maybe just a nudge in the right direction.

// written by Russell Gooday (c)2011
(function(doc, global){

  var Qselect, utils, splitToParts, find, filter, mapFind, child, relative,
      attribute, groupChildren, propDelete,

      hasOwn = Object.prototype.hasOwnProperty,
  
      push = Array.prototype.push,
      
      slice = Array.prototype.slice,
      
      rxSplitParts = new RegExp(
      '(\\\\w+:[-\\\\w]+(?:\\\\([^)]+\\\\))?)|'
      + '((?:\\\\w+)?\\\\[[^\\\\]]+])|'
      + '(>)|'
      + '(#[-\\\\w]+)|'
      + '((?:\\\\w+)?\\\\.[-\\\\w]+)|'
      + '(\\\\w+)', 'g'),
      
      partMatches = ['selector', 'pseudo', 'attribute', 'relative', 'id', 'className', 'tag'];

// -------------- Generic utils ----------------------------------

  utils = {
    // make Array
    slice : (function(doc){

      try {
        slice.call(doc.createElement('div'));
        return function( obj ) { return slice.call( obj ); };
      } catch(e) {
        return function( obj ) {
          var ret = [], len = obj.length;
        
          if (len) { while (len--) { ret[len] = obj[len]; } }
          return ret;
        };
      }
    }(doc)),
    // forEach
    forEach : function( arr, fn, context ) {
  
      var that = context || null,
          len = arr.length, i = 0;

      for (; i < len; i++) { fn.call(that, arr[i], i, arr); }
  
    },
    // shallow copy
    extend: function(from) {
  
      var to = arguments[1] || this, prop;
  
      for (prop in from) {
        if ( hasOwn.call(from, prop) ) { to[prop] = from[prop]; }
      }
    }
  };

// ---------------- End Generic utils ----------------------------

// ---------------- Split to Parts -------------------------------

  // returns an array of tokens. for example
  // 'ul li:first-child' becomes
  // [0] {find: 'tag', selector: 'ul'}
  // [1] {find: 'pseudo', selector: 'li:first-child'}
  // These are processed by search.
  splitToParts = function(selector) {

      // Match Types
      // ['selector',
      // 1:'pseudo', 2:'attribute', 3:'relative', 4:'id', 5:'className', 6:'tag']

      var regEx = rxSplitParts,
          matches = partMatches,
          len = matches.length,
          parts = [],
          match, i = 1;
  
      // reset regEx counter for exec	
      regEx.lastIndex = 0;

      while (match = regEx.exec(selector)) {

        // loop through match types
        for (i = 1; i < len; i++) {
          if (match[i]) {
            parts.push({find: matches[i], selector: match[0]}); break;
          }
        }
      }
    return parts;
  };

// ----------------------------------- End Split to Parts --------------------------------------------

// ----------------------------------- Qselect -------------------------------------------------------

  Qselect = function( selector, root ) {
    
    if ( !(this instanceof Qselect) ) { return new Qselect( selector, root ); }
    
    var parts,   
        root = root || doc;
        selector = selector.replace(/^[ ]|[ ]$/g, '');
  
    if( Qselect.cache[selector] ) { return Qselect.cache[selector]; }
    
    // Need to also handle where root maybe an html collection
    if (root instanceof Qselect) { 
      parts = splitToParts(root.selector.match(/([^\\s]+)$/)[1] + ' ' + selector);
      root = root.els;
    } else {
      parts = splitToParts(selector);
    }
       
    this.els = Qselect.search(parts, root);
    this.selector = selector; //Todo this is flawed. Needs to be root + selector
    Qselect.cache[selector] = this;
  };

  // Add utils as static properties of Qselect
  utils.extend(utils, Qselect);

  Qselect.extend({

    rxClasses : {

      "CLASS": /(\\w+)?\\.([-\\w]+)/,

      "PSEUDO": /(\\w+):([-\\w]+)(?:\\(([^)]+)\\))?/,

      "ATTRIBUTE": /(\\w+)?\\[/ // Todo Finish this expression

    },
  
    cache : {}, // needs work.

    // The main search method.
    search : function search( selector, root ){

      var query = selector[0],
          rest = selector.slice(1),
          results = [],
          els = (root.length) ? root : false, 
          len, i = 0;
      
      if (!els) {
        if (query.find === 'relative') {
          els = mapFind.relative(root, query.selector, rest[0]);
          rest = rest.slice(1);
        } else {
          els = mapFind[query.find](root, query.selector);
        }
      }

      for (len = els.length; i < len; i++) {
  
        if (rest[0]) { push.apply( results, search(rest, els[i]) ); }
    
        else { results.push (els[i]); }
      }
      return results;
    }
  });

  Qselect.prototype.get = function(){ return this.els; };

  filter = {

    attr : function (elems, attr, name) {

      var results = [], len = elems.length, i = 0;

      for (; i < len; i++) { 
        if (elems[i][attr] === name) { results.push(elems[i]); }
      }  
      return results;  
    }
  };

  mapFind = {

    id : function(root, selector){ return find.id(root, selector.slice(1)); },

    tag : function(root, selector){ return find.tag(root, selector); },

    className : function(root, selector){

      var match = selector.match(Qselect.rxClasses.CLASS),
          className = match[2],
          tag = match[1];
    
      return (!tag)
        ? find.className( root, className )
        : filter.attr( find.className(root, className), 'nodeName', tag.toUpperCase() );
  
    },

    pseudo : function(root, selector){

      var match = selector.match(Qselect.rxClasses.PSEUDO),
          tag = match[1], pseudo = match[2], expr = match[3];
  
      return find.pseudo( root, tag, pseudo, expr );
  
    },

    attribute : function(root, selector){
     // Todo
    },

    relative : function(root, selector, query){
      return find.relative(root, selector, query);
    }
  };
  
  find = {
    // ID
    id : function (root, id) { return [root.getElementById(id)]; },	
    // TAG
    tag : function (root, tag) { return Qselect.slice(root.getElementsByTagName(tag)); },
    // CLASSNAME
    className : (function() {

      if (doc.getElementsByClassName) {
  
        return function (root, className) {   
          return Qselect.slice(root.getElementsByClassName(className));    
        };
  
      }
  
      return function (root, className) {
  
        var toMatch = new RegExp("\\\\b" + className + "\\\\b"),
            elems = root.getElementsByTagName('*'),
            len = elems.length,
            results = [],
            elem = elems[0],
            i = 0, j = 0;
    
        for (; i < len; elem = elems[++i]) {   
          if (toMatch.test(elem.className)) { results[j++] = elem; }
        }
        return results;	
      };
    }(doc)),	
    // PSEUDO
    pseudo : function( root, tag, pseudo, expr ) {
  
      var els = root.getElementsByTagName(tag),
          filtered = [],
          len, el, i = 0, j = 0;

      if (/(first|last)-child/.test(pseudo)) {
  
        len = els.length; el = els[0];
    
        for (; i < len; el = els[++i]) { if (child[pseudo](el)) { filtered[j++] = el; } }

        return filtered;	
      }	
      // Note: May re-write pseudo to take a node and return a boolean instead.
      // That way can eliminate this extra conditional branching.
      if (/nth-child/.test(pseudo)) {
        Qselect.forEach(groupChildren(els), function(els){
          push.apply(filtered, child[pseudo](els, expr));
        });
      }	
    return filtered;
    },
    
    // only takes '>' at the moment.
    relative: function(root, selector, query) {

      var els = mapFind[query.find](root, query.selector),
          len = els.length,
          filtered = [],
          i = 0, j = 0;
        
      for (; i < len; i++) {
        if(relative[selector]( root, els[i] )) { filtered[j++] = els[i]; }
      }     
      return filtered;   
    }
  };

  child = {

    // split nth-child expression in to matches
    rxExpr : /(-(?=\\d*n\\+\\d+))?(\\d*)(?:(n)(?:([-+])(\\d+))?)?/,
  
    exprCache : {odd: 0, even: 1},
  
    "first-child" : function(el) {

      var currElem = el;

      while (currElem = currElem.previousSibling) {
        if (currElem.nodeType === 1) { return false; }
      }
      return true;
    },

    "last-child" : function(el) {

      var currElem = el;

      while (currElem = currElem.nextSibling) {
        if (currElem.nodeType === 1) { return false; }
      }
      return true;
    },

    "nth-child" : function(els, expr) {
  
      var filtered = [],
          len = els.length,
          e = {}, n = 0, i = 0, j = 0;

      // if just a quick index value e.g. (5). return that element
      if (/^\\d+$/.test(expr)){ return [els[expr]]; }

      if (/^odd|even$/.test(expr)) { n = 2; i = child.exprCache[expr]; }
  
      else {

        // if the matched/split expression isn't cached.
        if (!child.exprCache[expr]) {

          child.rxExpr.test(expr);
          // For clarity have labeled the matches
          child.exprCache[expr] = {
            oper1 : RegExp.$1, // -
            num : RegExp.$2,   // 3
            n : RegExp.$3,     // n
            oper2 : RegExp.$4, // +
            offset : RegExp.$5 // 9
          };
        }
        // n is the step i.e. 2n is a step of 2
        // i is the first element.
        e = child.exprCache[expr];
        n = parseInt(e.num || 1, 10);
        i = parseInt((e.oper2)
          ? (e.oper2 === '+')
            ? e.offset : (e.num || e.offset) - e.offset
          : (e.num || 1), 10)-1;
      }

      if (!e.oper1) { for ( ; i < len; i += n ) { filtered[j++] = els[i]; } }

      else { for (; i > -1; i -= n ) { filtered.unshift(els[i]); } }

      return filtered;	
    }	
  };

  attribute = {
  
  };
  
  relative = {
  
    ">" : function( root, node ){

      if (node.parentNode === root) { return true; }
      return false;
  
    }
  };

  // groups elements in parent->children groups
  // need this for nth-child
  groupChildren = function(els){

    var len = els.length,
        groups = [], parents = [],
        pId = 0, // unique id for parentNodes
        el, pNode, i = 0;
  
    for (; i < len; i++) {
      el = els[i];
      pNode = el.parentNode;
      if ( pNode._pId === undefined ) {
        pNode._pId = pId;
        groups[pId] = []; // add new parent group
        parents[pId++] = pNode; // store parent node for clean up later
      }
      groups[pNode._pId].push(el);
    }
    // cleanup parents and remove unique ids
    while(parents.length) { propDelete(parents.pop(), '_pId'); }

    return groups;
  };

  // delete property from element
  propDelete = (function(el, prop){
    try {
      var body = doc.body, tmpId = '_tmpId:' + new Date();	
      body[tmpId] = true;
      delete body[tmpId]; // try this
      return function(el, prop){ delete el[prop]; };
    } catch(e){
      body.removeAttribute(tmpId);
      return function(el,prop){ el.removeAttribute(prop); };
    }
  }());

  // Todo: Add to framework, rather than window.
  global.Qselect = Qselect;

}(window.document, window));

This is a simple test I’m running on the code. I’ve just added the ability to use the returned object as the root for subsequent selections. As pointed out by Raffles. Still needs work though.

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>qSelect test</title>
<style type='text/css'></style>
</head>
<body id = 'main'>
<div class = 'testLists'>
  <h2>List 1</h2>
  <ul class = 'uList1'>
    <li>Item 1</li><li>Item 2</li><li>Item 3</li>
    <li>Item 4
      <ul class = 'uList2'>
        <li>Item 1</li><li>Item 2</li><li>Item 3</li>
        <li>Item 4</li><li>Item 5</li><li>Item 6</li>
      </ul>
    </li>
    <li>Item 5</li><li>Item 6</li>
    <li>Item 7</li>
  </ul>
  <h2>List 2</h2>
  <ul class = 'uList3'>
    <li>Item 1</li><li>Item 2</li><li>Item 3</li>
    <li>Item 4</li><li>Item 5</li><li>Item 6</li>
  </ul>
</div>
<!--qSelect is intended to be a selector module, which will be added to a simple framework-->
<script type = 'text/javascript' src = 'qSelect.js'></script>
<script type='text/javascript'>

// Just a quick function to test Qselect
function testQselect(selector, root, style){
  var elems = Qselect(selector, root).get(), //get Elements here
      style = style.match(/\\s?([^:]+):\\s?(.+)/);   
  Qselect.forEach(elems, function(el){
	el.style[style[1]] = style[2];
  });
}

// A test to see if root is functioning properly
var list1 = Qselect('div.testLists ul.uList1'),
    list3 = Qselect('div.testLists ul.uList3');

testQselect('li:nth-child(odd)', list1,  'background: #ccf');
testQselect('li:nth-child(2n)', list3,  'background: #cfc');
testQselect('> li', list1, 'listStyle: none');
</script>
</body>
</html>

Thank you very much

RLM

That’s something that seems to be more relevant to classes, where you should favour using interfaces instead of sub-classing.

One of the more understandable pieces about this I’ve found over at Favour composition over inheritance: A real world OO example

Thanks for link Paul. I’ve had a quick read of that and the links attached to that article, but it’s going to need some further reading. Cheers

I read somewhere that while we can write complicated code, we only have half of the ability to test and understand that code. A natural conclusion that flows from this is that we should write easy to understand code, to improve our ability to test and understand it.

So, I’m running the code through jslint to help catch the more common issues that can get in the way of clearly understanding code.

Here are some notes that come from that:

The extend function doesn’t restrict the for…in properties.


for (prop in from) {
    ...
}

Any time a for…in loop is performed, you really do have to use hasOwnProperty to protect against inherited problems.


for (prop in from) {
    if (from.hasOwnProperty(prop)) {
        ...
    }
}

I had to stop and think for a bit about this next part of code. It wasn’t immediately clear as to what it does.


len = els.length;
el = els[0];
for (; i < len; el = els[++i]) {
    if (child[pseudo](el)) {
        filtered[j++] = el;
    }
}

When you have to stop and think about what’s going on, that’s a sign that it could be made easier to understand.

I know that the following suggestion makes for just another boring loop, but it’s instantly understandable now.


len = els.length;
for (i = 0; i < len; i += 1) {
    el = els[i];
    if (child[pseudo](el)) {
        filtered[j] = el;
        j += 1;
    }
}

Some other interesting issues were resolved too, such as the while loop in the splitToParts function. It looks to be inspired from handling PHP database rows, but with this while loop, is it checking that the assignment was capable of being made, or is it checking that the match is a truthy value?


while (match = regEx.exec(selector)) {

Another interesting area is in the realm of regular expressions.


"PSEUDO": /(\\w+):([-\\w]+)(?:\\(([color="red"][^)][/color]+)\\))?/,

It’s considered to be bad-form to use a negating class, because they can accept more garbage than you might think.

So, the question is why is the negating class used here? Clearly it’s because you don’t want it to greedily match more than it should, but is there a way for regular expressions to be explicitly told to not be greedy?

Apparently there is with the question mark (from: https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Regular_Expressions)

If used immediately after any of the quantifiers *, +, ?, or {}, makes the quantifier non-greedy (matching the minimum number of times), as opposed to the default, which is greedy (matching the maximum number of times). For example, using /\d+/ non-global match “123abc” return “123”, if using /\d+?/, only “1” will be matched.

So the following could be used instead of the negating class:
\w+?


"PSEUDO": /(\\w+):([-\\w]+)(?:\\((\\w+?)\\))?/,

Anyway after all of that, it’s been instructional to go through the jslint process. There are structural thoughts on the code that I’ve been having as well, so I’ll try some things out to see if I can make it clear to myself, before bringing it up here.

Paul,

I greatly appreciate you taking the time out to work through my code (even regex’s). Very decent of you.

In response.

Firstly I did run my code through Jslint previously. In fact I was surpirsed the thing ran at all, some of the errors it picked up on.

I read somewhere that while we can write complicated code, we only have half of the ability to test and understand that code. A natural conclusion that flows from this is that we should write easy to understand code, to improve our ability to test and understand it.

Words of wisdom. Completely agree.

The extend function doesn’t restrict the for…in properties.

Actually it does. JSLint missed that one, but it’s understandable. Note ‘hasOwn’.

var hasOwn = Object.prototype.hasOwnProperty;
.
.
.
    extend: function(from) {
  
      var to = arguments[1] || this, prop;
  
      for (prop in from) {
        if ( hasOwn.call(from, prop) ) { to[prop] = from[prop]; }
      }
    }

I had to stop and think for a bit about this next part of code. It wasn’t immediately clear as to what it does.

I know that the following suggestion makes for just another boring loop, but it’s instantly understandable now.

Again completely agreed. Come back to it even a week later and likewise you’re left scratching your head.

It’s considered to be bad-form to use a negating class, because they can accept more garbage than you might think.

I really wasn’t aware of that. ‘The Regular Expressions Cookbook’ covers the use of negative matching and how the engine works, backtracking etc. Another decent writeup is this one here Regex Tutorial - Possessive Quantifiers

What you suggest works a treat. I would like to know more about how it works behind the scenes.

I believe the usual mistake that people make is to use .+ or .*[character] not realising that ‘.’ will gobble up the whole line and then backtrack. Even if it comes up with the desired match it’s inefficient.

And thinking about it the +? has an advantages in that it’s not limiting you just to negating characters.

A few tests

var strg = 'capture the first sentence. not the second sentence. something random here';
// .+ fails here gobbling everything up and then backtracking to the first '.'
console.log(strg.match(/^.+\\./)); // capture the first sentence. not the second sentence.

// using a negating match. everything but '.' 1 or many times.
console.log(strg.match(/^[^.]+\\./)); // capture the first sentence.

// using the non greedy greedy method +?
console.log(strg.match(/^[\\w\\s]+?\\./)); // capture the first sentence.

console.log(strg.match(/^[\\w\\s]+?first/)); // capture the first

Excellent:)

By the way jslint does also pick up on this [-+] in my expressions, which I think it’s wrong to do.

[-+] is fine however [±] isn’t as the - now represents a range.

Excellent application though JSLint.

Again thanks Paul. Really informative.

Back to the design pattern books.

RLM

Ahh, I made the mistaken conclusion that the hasOwn function was somehow related to the css checking. Is this another case there doing the plain-old boring technique can help to reduce confusion? It would definitely remove the need to have hasOwn be globally available to everywhere as well.

Ahh, did you run it with “The Good Parts” turned on? I also like to enable “Assume a browser” too.

With those good parts, many many more things are found, enough to make you cry sometimes, as the catch-phrase says.

Understanding why some of them are issues can be even more difficult, but they commonly boil down to it being easier to misunderstand things when they’re done some other way.

Ahh, did you run it with “The Good Parts” turned on? I also like to enable “Assume a browser” too.

Assume a browser yes. It was about 2 or 3 in the morning when I had finished running through it. I then clicked on the ‘The b******d parts’, just to see and decided to call it a night. It’s not ‘good’ at all.

I’m doing a bit of experimentation at the moment. Concentarting on the Utils section, as that’s going to offer functionality that probably all the other modules will need.

It’s only v.rough, just thinking about design ideas at this point, rather than details.


(function(global, doc){

  var utils = (function(){
  
    var slice = Array.prototype.slice,
        push = Array.prototype.push,
        concat = Array.prototype.concat;
        
    var toString = Object.prototype.toString,
        objTypes = {
          'array' : '[object Array]',
          'object' : '[object Object]', 
          'regexp' : '[object RegExp]', 
          'function' : '[object Function]',
          'string' : '[object String]'
        },  
        hasOwn = Object.hasOwnProperty,
    
    _array = {
    
      toArray : function(obj){
        return slice.call(obj);
      },
      
      pushCall : function(obj){
        return push.apply(obj, slice.call(arguments, 1));
      }
    
    },
    
    _type = {
    
      isObj : function(obj, type) {
        return toString.call(obj) === objTypes[type.toLowerCase()];
      }
      
    },
    
    _extend = function(obj, ){
      for (var prop in obj){
        if (hasOwn.call(obj, prop)) {
          this[prop] = obj[prop];
        }
      }
    };
    
    return {
      array : _array,
      type : _type,
      extend : _extend
    };
  
  })(global, doc));

}(window, window.document))

I’m toying with the idea of being able to extend selectively. As I’m writing this I’m thinking I need to go and read up on the ‘sandbox pattern’ again maybe.

What I’m saying though is that I could extend an objects like this
the.prototype.extend = utils.extend;
the.prototype.extend(utils, {array : [‘slice’, ‘push’, ‘forEach’], types : [‘isObj’]});

maybe that’s too finely grained or over complicating. I’m really not sure on this. Here’s a first rough. i know there are certain flaws at this point.

Also know that YUI just goes with approach of adding a reference within an other module like var arr = utils.array, typ = utils.types; Might be simpler.

Need to go away and think about it.

var utils = {

  array : {
  
    slice : function(){ console.log('slice'); },
    
    pushCall : function(){ console.log('pushCall'); },
    
    forEach : function(){ console.log('forEach'); }
    
  },
  
  types : {
  
    isObj : function(){ console.log('isObj'); },

    hasOwn : function(){ console.log('hasOwn'); }   
  },
  
  extend : function(obj, selection){

    var eachProp, len = 0, i = 0;

    if ({}.toString.call(selection) === '[object Object]'){
      for (var prop in selection){
    
        if(selection.hasOwnProperty(prop) && obj[prop]){
      
          eachProp = selection[prop];
          len = eachProp.length; i = 0;
        
          for (; i < len; i += 1 ){
            this[prop] = obj[prop][eachProp[i]];
          }
        }
      }
      return;
    }
  
    for (var prop in obj){
      if (obj.hasOwnProperty(prop)) { this[prop] = obj[prop]; }
   }
  }

}

var Func = function(){};
Func.extend = Func.prototype.extend = utils.extend;

Func.prototype.extend(utils, {
  array : ['slice', 'forEach'],
  types : ['isObj']
});

console.dir(new Func());

Given that the utils part is deemed to be a separate part that has the potential to be used by multiple modules, it should be completely separated out from the qselect code itself.

Having the qselect code extend itself to add on the utils might not be useful, especially if you have 10 other blocks of code that make good use of your utils.

The usual way to handle such a situation is to kept the utils in a globally accessible place. That way the cod that relies on it can either make explicit use of it, or it can have a reference to the utils passed in, so that it can put that reference where it can be easily accessed.

This should help out then. I’ve attached the script file after working through all of those “good parts” issues that jslint found.

There are still some niggles, such as many outer (global) variables that function rely on, but for the most part it’s done.

Given that the utils part is deemed to be a separate part that has the potential to be used by multiple modules, it should be completely separated out from the qselect code itself.

Having the qselect code extend itself to add on the utils might not be useful, especially if you have 10 other blocks of code that make good use of your utils.

The usual way to handle such a situation is to kept the utils in a globally accessible place. That way the cod that relies on it can either make explicit use of it, or it can have a reference to the utils passed in, so that it can put that reference where it can be easily accessed.

Yes that sounds like a good plan.

Again a big thank you Paul, for putting yourself out. Not only for the advice, but for cleaning up my code.:tup:

RLM

That’s all right, it’s all part of what this code review thing is all about.

I’m trying to toss up whether the j variable can be completely removed. Here’s the thought process that’s involved with it.

Currently, using a separately increasing index is useful for performance:


if (/(first|last)-child/.test(pseudo)) {
    len = els.length;
    el = els[0];
    while (i < len) {
        if (child[pseudo](el)) {
            filtered[j] = el;
            j += 1;
        }
        i += 1;
        el = els[i];
    }
    return filtered;
}

but it is messy in that it introduces more for us to potentially worry about, and it gets in the way of us better understanding the purpose of the code.

One option is to use the array length


if (child[pseudo](el)) {
    filtered[filtered.length] = el;
}

which is cleaner, but there is a miniscule performance impact as it checks the array length each time.

Another option is to use the array push method:


if (child[pseudo](el)) {
    filtered.push(el);
}

but push methods are slower when used in a loop. They do have a performance benefit though when pushing a complete array. How could we do that? By using the array filter method? (compatability code for older browser is on the same page)


filtered.push(els.filter(...));

Perhaps. What are we filtering? From the if statement above, it seems that we are removing anything that is falsy in nature.

A quick web search turns up an article on remove falsy elements from array where you can just filter against the Boolean object to remove falsy values.

However, we can’t quite use that here because we’re not starting with an already processed array. So instead, we can just return what that if statement was checking for.


filtered.push(els.filter(function (el) {
    return child[pseudo](el);
}));

I don’t think we even need to push now though, what’s the updated code at this point?


if (/(first|last)-child/.test(pseudo)) {
    len = els.length;
    el = els[0];
    filtered.push(els.filter(function (el) {
        return child[pseudo](el);
    }));
    return filtered;
}

Why don’t we just return the filtered results? That would be cleaner, right?


if (/(first|last)-child/.test(pseudo)) {
    return els.filter(function (el) {
        return child[pseudo](el);
    });
}

Is that code clearer than what we started with?


if (/(first|last)-child/.test(pseudo)) {
    len = els.length;
    el = els[0];
    while (i < len) {
        if (child[pseudo](el)) {
            filtered[j] = el;
            j += 1;
        }
        i += 1;
        el = els[i];
    }
    return filtered;
}

Paul, I’ll have a good read through your comments. Thanks you.

Just a quick scan and ‘filter’ caught my eye. What a gem. I’m sure will come in v handy. Nice one.

I had a look at it in the ECMA docs as well just to try and figure out the arguments etc.

This is a cut back implementation I’ve come up with, not augmenting the prototype. Works in ie6.


var aFilter = Array.prototype.filter;

var filter = aFilter
  ? function(arr, fn, context){
      return aFilter.call(arr, fn, context);
    }
  : function(arr, fn, context) {
  
      var results = [],
          len = arr.length,
          i = 0;
    
      for (; i < len; i += 1){
        if (i in arr) {// skip the blanks
          if (fn.call(context, arr[i], i, arr)) {
            results.push(arr[i]);
          }
        }
      } 
      return results;      
    };

// Tests
var x = ['a','b','c','d'];
var y = { a : 1, b : 2, c : 'c', d : 4 };

alert(filter(x, function(i){
  if (/[ac]/.test(i)) return true;
})); // ['a','c']

// using the context of y
alert(filter(x, function(i){
  if (typeof this[i] === 'number') return true;
}, y)); // ['a','b','d']

Back to your comments.

Reading through your comments/tutorial has really put a smile on my face. That’s fantastic.

I think I’m write in saying that we’re talking about higher-order functions here. Reasonably straight forward to setup, the difficult part is knowing how and when to put them to use. It’s a way of thinking, that I hope will fall into place.

In addition I have to say there’s probably quite a lot of the newer javascript features that I’m not utilising or even aware of. I only stumbled apon String.prototype.trim today.

Console.dir(String.prototype) in chrome can come in handy here.

Hopefully the upcoming Definitive Guide will document all these features.

Thanks again Paul. A master class.

Edit:

Ignore this post, it’s complete garbage where I make some incorrect assumptions and even worse, do no test what I’m talking about.

[s]I’m going through other j indexed code, seeing what I can do to improve on them.

This part was interesting:


while (i < len) {
    if (toMatch.test(elem.className)) {
        results[j] = elem;
        j += 1;
    }
    i += 1;
    elem = elems[i];
}

Which messes with my head, until I turn it in to an easily understandable loop


for (i = 0; i < len; i += 1) {
    elem = elems[i];
    if (toMatch.test(elem.className)) {
        results[j] = elem;
        j += 1;
    }
}

So if an element has multiple classnames that are the same, for example:


<div class="bang duck bang">

you will end up with an array that has multiple references to the same element. Is that what you really want from this function?

Could the function be simplified to this, which also removes the multiple identical elements problem:


results = elem.filter(function () {
    return toMatch.test(this.className);
});

[/s]

Actually Paul this was what I had. You can see in the code pasted above. I’m guessing the while loop was something you added into the JSLint cleanup?

        var toMatch = new RegExp("\\\\b" + className + "\\\\b"),
            elems = root.getElementsByTagName('*'),
            len = elems.length,
            results = [],
            elem = elems[0],
            i = 0, j = 0;
   
        for (; i < len; elem = elems[++i]) {   
          if (toMatch.test(elem.className)) { results[j++] = elem; }
        }
        return results;  

Either way still fits in to category of ‘obfuscation’ though.

you will end up with an array that has multiple references to the same element. Is that what you really want from this function?

I don’t see how that will happen.

<body>
<div class='bang duck bang'></div>
<script type="text/javascript">
var toMatch = new RegExp("\\\\bbang\\\\b"),
    elems = document.getElementsByTagName('*'),
    len = elems.length,
    results = [],
    i = 0;

for (; i < len; i += 1) {
    if (toMatch.test(elems[i].className)) {
        results.push(elems[i]);
    }
}

console.dir(results); // ['div.bang']
</script>
</body>

Again

results = elem.filter(function () {
    return toMatch.test(this.className);
});

You’re trying to call filter on a node aren’t you?

And even if you change it to ‘elems’ then you are trying to call filter on an htmlcollection which won’t work. I also don’t understand where ‘this’ is coming from?

This is a fall back for the native document.getElementsByClassName. Which version of IE was that introduced?

We can slice that collection first, but in older versions of IE which this function is geared towards that involves falling back on looping through the collection and copying to an array as slice.call won’t work. In addition to that the native filter won’t be available either, although I can see the advantages of abstracting the loop into a function.

maybe I’ve got the wrong end of the stick, but after slice.calling the html collection, the closest I came to that was this.

results = elems.filter(function (i) {
    return toMatch.test(i.className);
});

Is that what you meant or am I off by a country mile? Wouldn’t be the first time.

Thanks

RLM

Yep, you’re spot on there. That is what I was intending to reduce things down to.

I was on completely the wrong track and getting confused (without any testing either) about what as being looped through.

ah that’s good. Cheers Paul, I thought I was starting to lose it.:smiley:

I mentioned in another thread about one drawback to top down selectors being that they can return duplicate elements.

i.e.

<div>
  <div>
    <span></span>
  </div>
</div>

‘div span’ will return 2 span elements.

John Resig explains in Secrets of the Javascript Ninja that one solution is to add unique id’s to the elements and filter out the duplicates that way.

However having a peek under the hood of sizzle it seems he’s handled it differently, by using sort and compareDocumentPosition or sourceIndex. His code baffles me as always but searching for compareDocumentPosition came up with 2 useful links.

John Resig - Comparing Document Position

and John’s source

DOM extension - getElementsByTagNames

Well now Paul’s introduced me to array.filter and I want to hit everything in site with my new hammer, I came up with this.

var sortNodes = docEl.sourceIndex
  ? function(a,b){
      return a.sourceIndex - b.sourceIndex;
    }
  : function(a,b){
      return 3 - (a.compareDocumentPosition(b) & 6);
    };

var removeDupe = function(arr){ 
  return arr.sort(sortNodes).filter(
    function(val, i , arr){
      if (val !== arr[i - 1]) return true;
    });
};

A test

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Options</title>
<style type="text/css">
body { font-size: 14px; font-family: verdana; };
</style>
</head>
<body>
<h2> Original </h2>
<ul>
  <li>List Item1</li>
  <li>List Item2</li>
  <li>List Item3</li>
</ul>
<h2> Duplicates </h2>
<ul id = 'before'></ul>
<h2> Remove Duplicates </h2>
<ul id = 'after'></ul>

<script type="text/javascript">  
var doc = window.document,
    docEl = doc.documentElement,
    aFilter = Array.prototype.filter,
    aSlice = Array.prototype.slice,
 
filter = aFilter
  ? function(arr, fn, context){
      return aFilter.call(arr, fn, context);
    }
  : function(arr, fn, context) {
      var results = [],
          len = arr.length, i = 0;
   
      for (; i < len; i += 1){
        if (i in arr) {// skip the blanks
          if (fn.call(context, arr[i], i, arr)) {
            results.push(arr[i]);
          }
        }
      }
      return results;     
    },

slice = (function () {
  try {
    aSlice.call(doc.createElement('div'));
    return function (obj) { return aSlice.call(obj); };
  } catch (e) {
    return function (obj) {
      var ret = [], 
          len = obj.length;

      if (len) {
        while (len--) { 
          ret[len] = obj[len];
        }
      }
      return ret;
    };
  }
}());    

// Bitmask
// 2 = precedes  4 = following. a and 6 matches 4 or 2
// 3 - 4 = -1(a before) or 3 - 2 = 1(a after)
var sortNodes = docEl.sourceIndex
  ? function(a,b){
      return a.sourceIndex - b.sourceIndex;
    }
  : function(a,b){
      return 3 - (a.compareDocumentPosition(b) & 6);
    };

var removeDupe = function(arr){ 
  return filter( arr.sort(sortNodes),
    function(val, i , arr){
      if (val !== arr[i - 1]) return true;
    });
};

// Test
var liItems = slice(doc.getElementsByTagName('li')),
    mix = [].concat.call(liItems, liItems); // add duplicates

function display(els, root){
  var len = els.length, i = 0, li;      
  for (; i < len; i += 1){
    li = doc.createElement('li');
    li.innerHTML = els[i].innerHTML;
    root.appendChild(li);
  }
}

display(mix, doc.getElementById('before'));
display(removeDupe(mix), doc.getElementById('after'));
</script>
</body>
</html>

I’ve only tested in firefox and IE6, but it seems to do it’s job.

RLM

Would using the array indexOf() method before adding an item to the array remove the need to then filter out duplicates?

I’m also looking at the code which handles nth-child, because I think that I can help to simplify what’s going on there.
Am I right to believe that a leading minus sign doesn’t change the outcome?

Selectors Level 3

[indent]The value a can be negative, but only the positive values of an+b, for n≥0, may represent an element in the document tree.

Example:
html|tr:nth-child(-n+6) /* represents the 6 first rows of XHTML tables */
[/indent]