I have an array of arrays that looks like this:
var arrays = [[1,2,3,4,5],
[1,2,6,4,5],
[1,3,6,4,5],
[1,2,3,6,5],
If you extend the specification of Array
, it's not actually that complex. The basic idea is to build up the tree level by level, taking each array element at a time and comparing to the previous one. This is the code (minus extensions):
function process(prevs, i) {
var vals = arrays.filter(function(d) { return prevs === null || d.slice(0, i).compare(prevs); })
.map(function(d) { return d[i]; }).getUnique();
return vals.map(function(d) {
var ret = { label: d }
if(i < arrays.map(function(d) { return d.length; }).max() - 1) {
tmp = process(prevs === null ? [d] : prevs.concat([d]), i+1);
if(tmp.filter(function(d) { return d.label != undefined; }).length > 0)
ret.children = tmp;
}
return ret;
});
}
No guarantees that it won't break for edge cases, but it seems to work fine with your data.
Complete jsfiddle here.
Some more detailed explanations:
filter
ing out those that are not the same as prevs
, which is our current (partial) path. At the start, prevs
is null
and nothing is filtered.i
th element). Duplicates are filtered. This is done by the .map()
and .getUnique()
.vals.map()
). For each, we set the label
attribute. The rest of the code determines whether there are children and gets them through a recursive call. To do this, we first check whether there are elements left in the arrays, i.e. if we are at the deepest level of the tree. If so, we make the recursive call, passing in the new prev
that includes the element we are currently processing and the next level (i+1
). Finally, we check the result of this recursive call for empty elements -- if there are only empty children, we don't save them. This is necessary because not all of the arrays (i.e. not all of the paths) have the same length.Here's a more straightforward function that just uses nested for
-loops to cycle through all the path instructions in each of your set of arrays.
To make it easier to find the child element with a given label, I have implemented children
as a data object/associative array instead of a numbered array. If you want to be really robust, you could use a d3.map for the reasons described at that link, but if your labels are actually integers than that's not going to be a problem. Either way, it just means that when you need to access the children as an array (e.g., for the d3 layout functions), you have to specify a function to make an array out of the values of the object -- the d3.values(object) utility function does it for you.
The key code:
var root={},
path, node, next, i,j, N, M;
for (i = 0, N=arrays.length; i<N; i++){
//for each path in the data array
path = arrays[i];
node = root; //start the path from the root
for (j=0,M=path.length; j<M; j++){
//follow the path through the tree
//creating new nodes as necessary
if (!node.children){
//undefined, so create it:
node.children = {};
//children is defined as an object
//(not array) to allow named keys
}
next = node.children[path[j]];
//find the child node whose key matches
//the label of this step in the path
if (!next) {
//undefined, so create
next = node.children[path[j]] =
{label:path[j]};
}
node = next;
// step down the tree before analyzing the
// next step in the path.
}
}
Implemented with your sample data array and a basic cluster dendogram charting method:
http://fiddle.jshell.net/KWc73/
Edited to add: As mentioned in the comments, to get the output looking exactly as requested:
Like this:
root = d3.values(root.children)[0];
//this is the root from the original data,
//assuming all paths start from one root, like in the example data
//recurse through the tree, turning the child
//objects into arrays
function childrenToArray(n){
if (n.children) {
//this node has children
n.children = d3.values(n.children);
//convert to array
n.children.forEach(childrenToArray);
//recurse down tree
}
}
childrenToArray(root);
Updated fiddle:
http://fiddle.jshell.net/KWc73/1/
Since d3-collection has been deprecated in favor of d3.array, we can use d3.groups to achieve what used to work with d3.nest
:
var input = [
[1, 2, 3, 4, 5],
[1, 2, 6, 4, 5],
[1, 3, 6, 4, 5],
[1, 2, 3, 6, 5],
[1, 7, 5],
[1, 7, 3, 5]
];
function process(arrays, depth) {
return d3.groups(arrays, d => d[depth]).map(x => {
if (
x[1].length > 1 || // if there is more than 1 child
(x[1].length == 1 && x[1][0][depth+1]) // if there is 1 child and the future depth is inferior to the child's length
)
return ({
"label": x[0],
"children": process(x[1], depth+1)
});
return ({ "label": x[0] }); // if there is no child
});
};
console.log(process(input, 0));
<script src="https://d3js.org/d3-array.v2.min.js"></script>
This:
d3.groups
) its arrays on the array element whose index is equal to the depth.Here is the intermediate result produced by d3.groups
within a recursion step (grouping arrays on there 3rd element):
var input = [
[1, 2, 3, 4, 5],
[1, 2, 6, 4, 5],
[1, 2, 3, 6, 5]
];
console.log(d3.groups(input, d => d[2]));
<script src="https://d3js.org/d3-array.v2.min.js"></script>
Edit - fixed
Here is my solution Pro:It is all in one go (doesn't need objects converting to arrays like above) Pro:It keeps the size/value count Pro:the output is EXACTLY the same as a d3 flare with children Con:it is uglier, and likely less efficient Big Thanks to previous comments for helping me work it out.
var data = [[1,2,3,4,5],
[1,2,6,4,5],
[1,3,6,4,5],
[1,2,3,6,5],
[1,7,5],
[1,7,3,5]]
var root = {"name":"flare", "children":[]} // the output
var node // pointer thingy
var row
// loop through array
for(var i=0;i<data.length;i++){
row = data[i];
node = root;
// loop through each field
for(var j=0;j<row.length;j++){
// set undefined to "null"
if (typeof row[j] !== 'undefined' && row[j] !== null) {
attribute = row[j]
}else{
attribute = "null"
}
// using underscore.js, does this field exist
if(_.where(node.children, {name:attribute}) == false ){
if(j < row.length -1){
// this is not the deepest field, so create a child with children
var oobj = {"name":attribute, "children":[] }
node.children.push(oobj)
node = node.children[node.children.length-1]
}else{
// this is the deepest we go, so set a starting size/value of 1
node.children.push({"name":attribute, "size":1 })
}
}else{
// the fields exists, but we need to find where
found = false
pos = 0
for(var k=0;k< node.children.length ;k++){
if(node.children[k]['name'] == attribute){
pos = k
found = true
break
}
}
if(!node.children[pos]['children']){
// if no key called children then we are at the deepest layer, increment
node.children[pos]['size'] = parseInt(node.children[pos]['size']) + 1
}else{
// we are not at the deepest, so move the pointer "node" and allow code to continue
node = node.children[pos]
}
}
}
}
// object here
console.log(root)
// stringified version to page
document.getElementById('output').innerHTML = JSON.stringify(root, null, 1);
Working examples https://jsfiddle.net/7qaz062u/
Output
{ "name": "flare", "children": [ { "name": 1, "children": [ { "name": 2, "children": [ { "name": 3, "children": [ { "name": 4, "children": [ { "name": 5, "size": 1 } ] } ] }, { "name": 6, "children": [ { "name": 4, "children": [ { "name": 5, "size": 1 } ] } ] } ] }, { "name": 3, "children": [ { "name": 6, "children": [ { "name": 4, "children": [ { "name": 5, "size": 1 } ] } ] }, { "name": 3, "children": [ { "name": 6, "children": [ { "name": 5, "size": 1 } ] } ] } ] }, { "name": 7, "children": [ { "name": 5, "size": 1 }, { "name": 3, "children": [ { "name": 5, "size": 1 } ] } ] } ] } ] }