I am working with d3.js to visualise families of animals (organisms) (up to 4000 at a time) as a tree graph, though the data source could just as well be a directory listing, or
Given your starting input I believe something like the following code will produce your desired output. I don't imagine this is the prettiest way to do it, but it's what came to mind at the time.
It seemed easiest to pre-process the data to first split up the initial array of strings into an array of arrays like this:
[
["Hemiptera","Miridae","Kanakamiris" ],
["Hemiptera","Miridae","Neophloeobia","incisa" ],
//etc
]
...and then process that to get a working object in a form something like this:
working = {
Hemiptera : {
Miridae : {
Kanakamiris : {},
Neophloeobia : {
incisa : {}
}
}
},
Lepidoptera : {
Nymphalidae : {
Ephinephile : {
rawnsleyi : {}
}
}
}
}
...because working with objects rather than arrays makes it easier to test whether child items already exist. Having created the above structure I then process it one last time to get your final desired output. So:
// start by remapping the data to an array of arrays
var organisms = data.organisms.map(function(v) {
return v.name.split(".");
});
// this function recursively processes the above array of arrays
// to create an object whose properties are also objects
function addToHeirarchy(val, level, heirarchy) {
if (val[level]) {
if (!heirarchy.hasOwnProperty(val[level]))
heirarchy[val[level]] = {};
addToHeirarchy(val, level + 1, heirarchy[val[level]]);
}
}
var working = {};
for (var i = 0; i < organisms.length; i++)
addToHeirarchy(organisms[i], 0, working);
// this function recursively processes the object created above
// to create the desired final structure
function remapHeirarchy(item) {
var children = [];
for (var k in item) {
children.push({
"name" : k,
"children" : remapHeirarchy(item[k])
});
}
return children;
}
var heirarchy = {
"name" : "ROOT",
"children" : remapHeirarchy(working)
};
Demo: http://jsfiddle.net/a669F/1/
The following is specific to the structure you've provided, it could be made more generic fairly easily. I'm sure the addChild function can be simplified. Hopefully the comments are helpful.
function toHeirarchy(obj) {
// Get the organisms array
var orgName, orgNames = obj.organisms;
// Make root object
var root = {name:'ROOT', children:[]};
// For each organism, get the name parts
for (var i=0, iLen=orgNames.length; i<iLen; i++) {
orgName = orgNames[i].name.split('.');
// Start from root.children
children = root.children;
// For each part of name, get child if already have it
// or add new object and child if not
for (var j=0, jLen=orgName.length; j<jLen; j++) {
children = addChild(children, orgName[j]);
}
}
return root;
// Helper function, iterates over children looking for
// name. If found, returns its child array, otherwise adds a new
// child object and child array and returns it.
function addChild(children, name) {
// Look for name in children
for (var i=0, iLen=children.length; i<iLen; i++) {
// If find name, return its child array
if (children[i].name == name) {
return children[i].children;
}
}
// If didn't find name, add a new object and
// return its child array
children.push({'name': name, 'children':[]});
return children[children.length - 1].children;
}
}
An alternative answer to my own question....In the past day I have learn't a great deal more about d3.js and in relation to this question d3.nest() with .key() and .entries() is my friend (all d3 functions). This answer involves changing the initial data, so it may not qualify as a good answer to the specific question i asked. However if someone has a similar question and can change things on the server then this is a pretty simple solution:
return the data from the database in this format:
json = {'Organisms': [
{ 'Rank_Order': 'Hemiptera',
'Rank_Family': 'Miridae',
'Rank_Genus': 'Kanakamiris',
'Rank_Species': '' },
{}, ...
]}
Then using d3.nest()
organismNest = d3.nest()
.key(function(d){return d.Rank_Order;})
.key(function(d){return d.Rank_Family;})
.key(function(d){return d.Rank_Genus;})
.key(function(d){return d.Rank_Species;})
.entries(json.Organism);
this returns:
{
key: "Hemiptera"
values: [
{
key: "Cicadidae"
values: [
{
key: "Pauropsalta "
values: [
{
key: "siccanus"
values: [
Rank_Family: "Cicadidae"
Rank_Genus: "Pauropsalta "
Rank_Order: "Hemiptera"
Rank_Species: "siccanus"
AnotherOriginalDataKey: "original data value"
etc etc, nested and lovely
This returns something very much similar to they array that I described as my desired format above in the question, with a few differences. In particular, There is no all enclosing ROOT element and also whereas they keys I originally wanted were "name" and "children" .nest() returns keys as "key" and "values" respectively. These alternatives keys are easy enough to use in d3.js by just defining appropriate data accessor functions (basic d3 concept) ... but that is getting beyond the original scope of the question ... hope that helps someone too