I\'d like a data structure to efficiently store a long sequence of numbers. The numbers should always be whole integers, let\'s say Longs.
The feature of the inputs
Interval trees seem to be geared toward storing overlapping intervals, while in your case that doesn't make sense. An interval tree could hold millions of small overlapping intervals, which together form only a handful of longer non-overlapping intervals.
If you want to store only non-overlapping intervals, then adding or deleting an interval may involve deleting a number of consecutive intervals that fall within the new interval. So quickly finding consecutive intervals, and efficient deletion of a potentially large number of intervals are important.
That sounds like a job for the humble linked list. When inserting a new interval, you'd:
Deleting an interval would be largely the same: you truncate the intervals that the starting point and end point are inside of, and delete all the intervals inbetween.
The average and worst-case complexity of this are N/2 and N, where N is the number of intervals in the linked list. You could improve this by adding a method to avoid having to iterate over the whole list to find the starting point; if you know the range and distribution of the values, this could be something like a hash table; e.g. if the values are from 1 to X and the distribution is uniform, you'd store a table of length Y, where each item points to the interval that starts before the value X/Y. When adding an interval (A,B), you'd look up table[A/Y] and start iterating over the linked list from there. The choice of value for Y would be determined by how much space you want to use, versus how close you want to get to the actual position of the starting point. The complexities would then drop by a factor Y.
(If you work in a language where you can short-circuit a linked list, and just leave the chain of objects you cut loose to be garbage-collected, you could find the location of the starting point and end point independently, connect them, and skip the deletion of all the intervals inbetween. I don't know whether this would actually increase speed in practice.)
Here's a start of a code example, with the three range functions, but without further optimisation:
function Interval(a, b, n) {
this.start = a;
this.end = b;
this.next = n;
}
function IntervalList() {
this.first = null;
}
IntervalList.prototype.addRange = function(a, b) {
if (!this.first || b < this.first.start - 1) {
this.first = new Interval(a, b, this.first); // insert as first element
return;
}
var i = this.first;
while (a > i.end + 1 && i.next && b >= i.next.start - 1) {
i = i.next; // locate starting point
}
if (a > i.end + 1) { // insert as new element
i.next = new Interval(a, b, i.next);
return;
}
var j = i.next;
while (j && b >= j.start - 1) { // locate end point
i.end = j.end;
i.next = j = j.next; // discard overlapping interval
}
if (a < i.start) i.start = a; // update interval start
if (b > i.end) i.end = b; // update interval end
}
IntervalList.prototype.delRange = function(a, b) {
if (!this.first || b < this.first.start) return; // range before first interval
var i = this.first;
while (i.next && a > i.next.start) i = i.next; // a in or after interval i
if (a > i.start) { // a in interval
if (b < i.end) { // range in interval -> split
i.next = new Interval(b + 1, i.end, i.next);
i.end = a - 1;
return;
}
if (a <= i.end) i.end = a - 1; // truncate interval
}
var j = a > i.start ? i.next : i;
while (j && b >= j.end) j = j.next; // b before or in interval j
if (a <= this.first.start) this.first = j; // short-circuit list
else i.next = j;
if (j && b >= j.start) j.start = b + 1; // truncate interval
}
IntervalList.prototype.hasRange = function(a, b) {
if (!this.first) return false; // empty list
var i = this.first;
while (i.next && a > i.end) i = i.next; // a before or in interval i
return a >= i.start && b <= i.end; // range in interval ?
}
IntervalList.prototype.addValue = function(a) {
this.addRange(a, a); // could be optimised
}
IntervalList.prototype.delValue = function(a) {
this.delRange(a, a); // could be optimised
}
IntervalList.prototype.hasValue = function(a) {
return this.hasRange(a, a); // could be optimised
}
IntervalList.prototype.print = function() {
var i = this.first;
if (i) do document.write("(" + i.start + "-" + i.end + ") "); while (i = i.next);
document.write("
");
}
var intervals = new IntervalList();
intervals.addRange(100,199);
document.write("+ (100-199) → "); intervals.print();
intervals.addRange(300,399);
document.write("+ (300-399) → "); intervals.print();
intervals.addRange(200,299);
document.write("+ (200-299) → "); intervals.print();
intervals.delRange(225,275);
document.write("− (225-275) → "); intervals.print();
document.write("(150-200) ? " + intervals.hasRange(150,200) + "
");
document.write("(200-300) ? " + intervals.hasRange(200,300) + "
");