As made clear in update 3 on this answer, this notation:
var hash = {};
hash[X]
does not actually hash the object X
; it actually
My 'Map' implementation, derived from Christoph's example:
Example usage:
var map = new Map(); // Creates an "in-memory" map
var map = new Map("storageId"); // Creates a map that is loaded/persisted using html5 storage
function Map(storageId) {
this.current = undefined;
this.size = 0;
this.storageId = storageId;
if (this.storageId) {
this.keys = new Array();
this.disableLinking();
}
}
Map.noop = function() {
return this;
};
Map.illegal = function() {
throw new Error("illegal operation for maps without linking");
};
// Map initialisation from an existing object
// doesn't add inherited properties if not explicitly instructed to:
// omitting foreignKeys means foreignKeys === undefined, i.e. == false
// --> inherited properties won't be added
Map.from = function(obj, foreignKeys) {
var map = new Map;
for(var prop in obj) {
if(foreignKeys || obj.hasOwnProperty(prop))
map.put(prop, obj[prop]);
}
return map;
};
Map.prototype.disableLinking = function() {
this.link = Map.noop;
this.unlink = Map.noop;
this.disableLinking = Map.noop;
this.next = Map.illegal;
this.key = Map.illegal;
this.value = Map.illegal;
// this.removeAll = Map.illegal;
return this;
};
// Overwrite in Map instance if necessary
Map.prototype.hash = function(value) {
return (typeof value) + ' ' + (value instanceof Object ?
(value.__hash || (value.__hash = ++arguments.callee.current)) :
value.toString());
};
Map.prototype.hash.current = 0;
// --- Mapping functions
Map.prototype.get = function(key) {
var item = this[this.hash(key)];
if (item === undefined) {
if (this.storageId) {
try {
var itemStr = localStorage.getItem(this.storageId + key);
if (itemStr && itemStr !== 'undefined') {
item = JSON.parse(itemStr);
this[this.hash(key)] = item;
this.keys.push(key);
++this.size;
}
} catch (e) {
console.log(e);
}
}
}
return item === undefined ? undefined : item.value;
};
Map.prototype.put = function(key, value) {
var hash = this.hash(key);
if(this[hash] === undefined) {
var item = { key : key, value : value };
this[hash] = item;
this.link(item);
++this.size;
}
else this[hash].value = value;
if (this.storageId) {
this.keys.push(key);
try {
localStorage.setItem(this.storageId + key, JSON.stringify(this[hash]));
} catch (e) {
console.log(e);
}
}
return this;
};
Map.prototype.remove = function(key) {
var hash = this.hash(key);
var item = this[hash];
if(item !== undefined) {
--this.size;
this.unlink(item);
delete this[hash];
}
if (this.storageId) {
try {
localStorage.setItem(this.storageId + key, undefined);
} catch (e) {
console.log(e);
}
}
return this;
};
// Only works if linked
Map.prototype.removeAll = function() {
if (this.storageId) {
for (var i=0; i<this.keys.length; i++) {
this.remove(this.keys[i]);
}
this.keys.length = 0;
} else {
while(this.size)
this.remove(this.key());
}
return this;
};
// --- Linked list helper functions
Map.prototype.link = function(item) {
if (this.storageId) {
return;
}
if(this.size == 0) {
item.prev = item;
item.next = item;
this.current = item;
}
else {
item.prev = this.current.prev;
item.prev.next = item;
item.next = this.current;
this.current.prev = item;
}
};
Map.prototype.unlink = function(item) {
if (this.storageId) {
return;
}
if(this.size == 0)
this.current = undefined;
else {
item.prev.next = item.next;
item.next.prev = item.prev;
if(item === this.current)
this.current = item.next;
}
};
// --- Iterator functions - only work if map is linked
Map.prototype.next = function() {
this.current = this.current.next;
};
Map.prototype.key = function() {
if (this.storageId) {
return undefined;
} else {
return this.current.key;
}
};
Map.prototype.value = function() {
if (this.storageId) {
return undefined;
}
return this.current.value;
};
In ECMAScript 6 you can use WeakMap.
Example:
var wm1 = new WeakMap(),
wm2 = new WeakMap(),
wm3 = new WeakMap();
var o1 = {},
o2 = function(){},
o3 = window;
wm1.set(o1, 37);
wm1.set(o2, "azerty");
wm2.set(o1, o2); // A value can be anything, including an object or a function
wm2.set(o3, undefined);
wm2.set(wm1, wm2); // Keys and values can be any objects. Even WeakMaps!
wm1.get(o2); // "azerty"
wm2.get(o2); // Undefined, because there is no value for o2 on wm2
wm2.get(o3); // Undefined, because that is the set value
wm1.has(o2); // True
wm2.has(o2); // False
wm2.has(o3); // True (even if the value itself is 'undefined')
wm3.set(o1, 37);
wm3.get(o1); // 37
wm3.clear();
wm3.get(o1); // Undefined, because wm3 was cleared and there is no value for o1 anymore
wm1.has(o1); // True
wm1.delete(o1);
wm1.has(o1); // False
But:
Because of references being weak, WeakMap keys are not enumerable (i.e. there is no method giving you a list of the keys).
According to ECMAScript 2015 (ES6), standard JavaScript has a Map implementation. More about which could be found here.
Basic usage:
var myMap = new Map();
var keyString = "a string",
keyObj = {},
keyFunc = function () {};
// Setting the values
myMap.set(keyString, "value associated with 'a string'");
myMap.set(keyObj, "value associated with keyObj");
myMap.set(keyFunc, "value associated with keyFunc");
myMap.size; // 3
// Getting the values
myMap.get(keyString); // "value associated with 'a string'"
myMap.get(keyObj); // "value associated with keyObj"
myMap.get(keyFunc); // "value associated with keyFunc"
You can use ECMAScript 6 WeakMap or Map:
WeakMaps are key/value maps in which keys are objects.
Map
objects are simple key/value maps. Any value (both objects and primitive values) may be used as either a key or a value.
Be aware that neither is widely supported, but you can use ECMAScript 6 Shim (requires native ECMAScript 5 or ECMAScript 5 Shim) to support Map
, but not WeakMap
(see why).
If performance is not critical (e.g., the amount of keys is relatively small) and you don't want to pollute your (or maybe not your) objects with additional fields like _hash
, _id
, etc., then you can make use of the fact that Array.prototype.indexOf employs strict equality. Here is a simple implementation:
var Dict = (function(){
// Internet Explorer 8 and earlier does not have any Array.prototype.indexOf
function indexOfPolyfill(val) {
for (var i = 0, l = this.length; i < l; ++i) {
if (this[i] === val) {
return i;
}
}
return -1;
}
function Dict(){
this.keys = [];
this.values = [];
if (!this.keys.indexOf) {
this.keys.indexOf = indexOfPolyfill;
}
};
Dict.prototype.has = function(key){
return this.keys.indexOf(key) != -1;
};
Dict.prototype.get = function(key, defaultValue){
var index = this.keys.indexOf(key);
return index == -1 ? defaultValue : this.values[index];
};
Dict.prototype.set = function(key, value){
var index = this.keys.indexOf(key);
if (index == -1) {
this.keys.push(key);
this.values.push(value);
} else {
var prevValue = this.values[index];
this.values[index] = value;
return prevValue;
}
};
Dict.prototype.delete = function(key){
var index = this.keys.indexOf(key);
if (index != -1) {
this.keys.splice(index, 1);
return this.values.splice(index, 1)[0];
}
};
Dict.prototype.clear = function(){
this.keys.splice(0, this.keys.length);
this.values.splice(0, this.values.length);
};
return Dict;
})();
Example of usage:
var a = {}, b = {},
c = { toString: function(){ return '1'; } },
d = 1, s = '1', u = undefined, n = null,
dict = new Dict();
// Keys and values can be anything
dict.set(a, 'a');
dict.set(b, 'b');
dict.set(c, 'c');
dict.set(d, 'd');
dict.set(s, 's');
dict.set(u, 'u');
dict.set(n, 'n');
dict.get(a); // 'a'
dict.get(b); // 'b'
dict.get(s); // 's'
dict.get(u); // 'u'
dict.get(n); // 'n'
// etc.
Comparing to ECMAScript 6 WeakMap, it has two issues: O(n) search time and non-weakness (i.e., it will cause memory leak if you don't use delete
or clear
to release keys).
Hash your objects yourself manually, and use the resulting strings as keys for a regular JavaScript dictionary. After all, you are in the best position to know what makes your objects unique. That's what I do.
Example:
var key = function(obj){
// Some unique object-dependent key
return obj.totallyUniqueEmployeeIdKey; // Just an example
};
var dict = {};
dict[key(obj1)] = obj1;
dict[key(obj2)] = obj2;
This way you can control indexing done by JavaScript without heavy lifting of memory allocation, and overflow handling.
Of course, if you truly want the "industrial-grade solution", you can build a class parameterized by the key function, and with all the necessary API of the container, but … we use JavaScript, and trying to be simple and lightweight, so this functional solution is simple and fast.
The key function can be as simple as selecting right attributes of the object, e.g., a key, or a set of keys, which are already unique, a combination of keys, which are unique together, or as complex as using some cryptographic hashes like in DojoX encoding, or DojoX UUID. While the latter solutions may produce unique keys, personally I try to avoid them at all costs, especially, if I know what makes my objects unique.
Update in 2014: Answered back in 2008 this simple solution still requires more explanations. Let me clarify the idea in a Q&A form.
Your solution doesn't have a real hash. Where is it???
JavaScript is a high-level language. Its basic primitive (Object) includes a hash table to keep properties. This hash table is usually written in a low-level language for efficiency. Using a simple object with string keys we use an efficiently implemented hash table without any efforts on our part.
How do you know they use a hash?
There are three major ways to keep a collection of objects addressable by a key:
Obviously JavaScript objects use hash tables in some form to handle general cases.
Do browser vendors really use hash tables???
Really.
Do they handle collisions?
Yes. See above. If you found a collision on unequal strings, please do not hesitate to file a bug with a vendor.
So what is your idea?
If you want to hash an object, find what makes it unique and use it as a key. Do not try to calculate a real hash or emulate hash tables — it is already efficiently handled by the underlying JavaScript object.
Use this key with JavaScript's Object
to leverage its built-in hash table while steering clear of possible clashes with default properties.
Examples to get you started:
I used your suggestion and cached all objects using a user name. But some wise guy is named "toString", which is a built-in property! What should I do now?
Obviously, if it is even remotely possible that the resulting key will exclusively consists of Latin characters, you should do something about it. For example, add any non-Latin Unicode character you like at the beginning or at the end to un-clash with default properties: "#toString", "#MarySmith". If a composite key is used, separate key components using some kind of non-Latin delimiter: "name,city,state".
In general, this is the place where we have to be creative and select the easiest keys with given limitations (uniqueness, potential clashes with default properties).
Note: unique keys do not clash by definition, while potential hash clashes will be handled by the underlying Object
.
Why don't you like industrial solutions?
IMHO, the best code is no code at all: it has no errors, requires no maintenance, easy to understand, and executes instantaneously. All "hash tables in JavaScript" I saw were >100 lines of code, and involved multiple objects. Compare it with: dict[key] = value
.
Another point: is it even possible to beat a performance of a primordial object written in a low-level language, using JavaScript and the very same primordial objects to implement what is already implemented?
I still want to hash my objects without any keys!
We are in luck: ECMAScript 6 (released in June 2015) defines map and set.
Judging by the definition, they can use an object's address as a key, which makes objects instantly distinct without artificial keys. OTOH, two different, yet identical objects, will be mapped as distinct.
Comparison breakdown from MDN:
Objects are similar to Maps in that both let you set keys to values, retrieve those values, delete keys, and detect whether something is stored at a key. Because of this (and because there were no built-in alternatives), Objects have been used as Maps historically; however, there are important differences that make using a Map preferable in certain cases:
- The keys of an Object are Strings and Symbols, whereas they can be any value for a Map, including functions, objects, and any primitive.
- The keys in Map are ordered while keys added to object are not. Thus, when iterating over it, a Map object returns keys in order of insertion.
- You can get the size of a Map easily with the size property, while the number of properties in an Object must be determined manually.
- A Map is an iterable and can thus be directly iterated, whereas iterating over an Object requires obtaining its keys in some fashion and iterating over them.
- An Object has a prototype, so there are default keys in the map that could collide with your keys if you're not careful. As of ES5 this can be bypassed by using map = Object.create(null), but this is seldom done.
- A Map may perform better in scenarios involving frequent addition and removal of key pairs.