In my code below, 5 always prints before 4. I thought because the callback to postUsers
was in a return statement from matchAgainstAD
it would wait
This is very classic asynchronous problem in node.js. Your findUser()
function has an asynchronous response which means the callback is called sometime later. Meanwhile, the rest of your loop continues to run so that all the requests are in-flight at the same time and then the responses start coming in sometime later. Thus, you can't call postUsers()
after matchAgainstAd()
returns because the inner async operations are not yet completed and thus staleUsers
is not yet populated.
There are multiple approaches to solving this problem. In general, it is worth learning how to use promises for operations like this because they offer all sorts of very useful flow of control options when using asynchronous operations and node.js does pretty much all I/O operations as async. But, to best illustrate what's going on in this type of issue, I'll first show you a manually coded solution. In this manually coded solution, you keep track of how many operations are still remaining to complete and when all have completed, then and only then, do you call postUsers()
with the accumulated data.
Manually Coded Solution
var matchAgainstAD = function (stUsers) {
var remaining = stUsers.length;
stUsers.forEach(function (element, i) {
var sAMAccountName = stUsers[i].guiLoginName;
function checkDone() {
if (remaining === 0) {
postUsers(staleUsers);
}
}
// Find user by a sAMAccountName
var ad = new ActiveDirectory(config);
ad.findUser(sAMAccountName, function (err, user) {
--remaining;
if (err) {
console.log('ERROR: ' + JSON.stringify(err));
checkDone();
return;
}
if (!user) {
staleUsers.push(stUsers[i])
}
checkDone();
});
});
}
var postUsers = function(staleUsers) {
request.post({
headers: {'content-type' : 'application/x-www-form-urlencoded'},
url: 'http://localhost:8000/api/record/newRecord',
qs: staleUsers
}, function(err, res, body) {
// console.log(body);
})
}
The core logic here is that you initialize a counter to the number of operations that will be carried out. Then, in your loop where each operation is happening, you decrement the remaining
counter anytime one of the operations completes (calls it's completion callback). Then, after processing the result (in both the success and error code paths), you check if the remaining
count has reached 0 indicating that all requests are now done. If so, then the staleUsers
array is now fully populated and you can call postUsers(staleUsers)
to process the accumulated result.
Solution Coded Using Bluebird Promises
The idea here is that we use the control flow logic and enhanced error handling of promises to manage the async control flow. This is done by "promisifying" each asynchronous interface we are using here. "promisifying" is the process of creating a small function wrapper around any async function that follows the node.js calling convention where the last argument to the function is a callback that takes at least two arguments, the first an error and the second a value. This can be automatically turned into a wrapper function that returns a promise, allow using to use promise logic flow with any normal async operation.
Here's how this would work using the bluebird promise library.
var Promise = require('bluebird');
var request = Promise.promisifyAll(request('require'));
var matchAgainstAD = function (stUsers) {
var staleUsers = [];
var ad = new ActiveDirectory(config);
// get promisified version of findUser
var findUser = Promise.promisify(ad.findUser, ad);
return Promise.map(stUsers, function(userToSearchFor) {
var sAMAccountName = userToSearchFor.guiLoginName;
return findUser(sAMAccountName).then(function(user) {
// if no user found, then consider it a staleUser
if (!user) {
staleusers.push(userToSearchFor);
}
}, function(err) {
// purposely skip any requests we get an error on
// having an error handler that does nothing will
// stop promise propagation of the error (it will be considered "handled")
});
}).then(function() {
if (staleUsers.length) {
return postUsers(staleUsers);
}
return 0;
});
}
var postUsers = function (staleUsers) {
return request.postAsync({
headers: {
'content-type': 'application/x-www-form-urlencoded'
},
url: 'http://localhost:8000/api/record/newRecord',
qs: staleUsers
}).spread(function (res, body) {
// console.log(body);
return staleUsers.length;
})
}
matchAgainstAD(users).then(function(qtyStale) {
// success here
}, function(err) {
// error here
})
Standard ES6 Promises Version
And, here's a version that uses only the standard ES6 promises built-into node.js. The main difference here is that you have to code your own promisified versions of the async functions you want to use because you can't use the built-in promisify capabilities in Bluebird.
var request = request('require');
// make a promisified version of request.post
function requestPostPromise(options) {
return new Promise(function(resolve, reject) {
request.post(options, function(err, res, body) {
if (err) {
reject(err);
} else {
resolve([res, body]);
}
});
});
}
// make a function that gets a promisified version of ad.findUser
function getfindUserPromise(ad) {
return function(name) {
return new Promise(function(resolve, reject) {
ad.findUser(name, function(err, user) {
if (err) {
reject(err);
} else {
resolve(user);
}
});
});
}
}
var matchAgainstAD = function (stUsers) {
var staleUsers = [];
var promises = [];
var ad = new ActiveDirectory(config);
// get promisified version of findUser
var findUser = getFindUserPromise(ad);
stUsers.each(function(userToSearchFor) {
promises.push(findUser(userToSearchFor.guiLoginName).then(function(user) {
// if no user found, then consider it a staleUser
if (!user) {
staleusers.push(userToSearchFor);
}
}, function(err) {
// purposely skip any requests we get an error on
// have an error handler that does nothing will
// stop promise propagation of the error (it will be considered "handled")
}));
});
return Promise.all(promises).then(function() {
if (staleUsers.length) {
return postUsers(staleUsers);
}
return 0;
});
}
var postUsers = function (staleUsers) {
return requestPostPromise({
headers: {
'content-type': 'application/x-www-form-urlencoded'
},
url: 'http://localhost:8000/api/record/newRecord',
qs: staleUsers
}).then(function (err, results) {
var res = results[0];
var body = results[1];
// console.log(body);
return staleUsers.length;
})
}
matchAgainstAD(users).then(function(qtyStale) {
// success here
}, function(err) {
// error here
})
ad.findUser
takes a callback that contains the console.log(4)
. That function is async, and will hit your callback when the IO operation has completed.
On the other hand, postUsers
is called completely synchronously, so it will hit console.log(5)
before ad.findUser
enters your callback.
A simple way to fix this is to call postUsers
from inside of your ad.findUser
callback.
I would suggest looking into the promise pattern for JavaScript to manage dependencies between async operations. There are several popular libraries (Q and RSVSP.js being a couple of them).