This is a hard problem. My solution is at http://stuff.mit.edu/~jik/software/delete-s3-bucket.pl.txt. It describes all of the things I've determined can go wrong in a comment at the top. Here's the current version of the script (if I change it, I'll put a new version at the URL but probably not here).
#!/usr/bin/perl
# Copyright (c) 2010 Jonathan Kamens.
# Released under the GNU General Public License, Version 3.
# See <http://www.gnu.org/licenses/>.
# $Id: delete-s3-bucket.pl,v 1.3 2010/10/17 03:21:33 jik Exp $
# Deleting an Amazon S3 bucket is hard.
#
# * You can't delete the bucket unless it is empty.
#
# * There is no API for telling Amazon to empty the bucket, so you have to
# delete all of the objects one by one yourself.
#
# * If you've recently added a lot of large objects to the bucket, then they
# may not all be visible yet on all S3 servers. This means that even after the
# server you're talking to thinks all the objects are all deleted and lets you
# delete the bucket, additional objects can continue to propagate around the S3
# server network. If you then recreate the bucket with the same name, those
# additional objects will magically appear in it!
#
# It is not clear to me whether the bucket delete will eventually propagate to
# all of the S3 servers and cause all the objects in the bucket to go away, but
# I suspect it won't. I also suspect that you may end up continuing to be
# charged for these phantom objects even though the bucket they're in is no
# longer even visible in your S3 account.
#
# * If there's a CR, LF, or CRLF in an object name, then it's sent just that
# way in the XML that gets sent from the S3 server to the client when the
# client asks for a list of objects in the bucket. Unfortunately, the XML
# parser on the client will probably convert it to the local line ending
# character, and if it's different from the character that's actually in the
# object name, you then won't be able to delete it. Ugh! This is a bug in the
# S3 protocol; it should be enclosing the object names in CDATA tags or
# something to protect them from being munged by the XML parser.
#
# Note that this bug even affects the AWS Web Console provided by Amazon!
#
# * If you've got a whole lot of objects and you serialize the delete process,
# it'll take a long, long time to delete them all.
use threads;
use strict;
use warnings;
# Keys can have newlines in them, which screws up the communication
# between the parent and child processes, so use URL encoding to deal
# with that.
use CGI qw(escape unescape); # Easiest place to get this functionality.
use File::Basename;
use Getopt::Long;
use Net::Amazon::S3;
my $whoami = basename $0;
my $usage = "Usage: $whoami [--help] --access-key-id=id --secret-access-key=key
--bucket=name [--processes=#] [--wait=#] [--nodelete]
Specify --processes to indicate how many deletes to perform in
parallel. You're limited by RAM (to hold the parallel threads) and
bandwidth for the S3 delete requests.
Specify --wait to indicate seconds to require the bucket to be verified
empty. This is necessary if you create a huge number of objects and then
try to delete the bucket before they've all propagated to all the S3
servers (I've seen a huge backlog of newly created objects take *hours* to
propagate everywhere). See the comment at the top of the script for more
information about this issue.
Specify --nodelete to empty the bucket without actually deleting it.\n";
my($aws_access_key_id, $aws_secret_access_key, $bucket_name, $wait);
my $procs = 1;
my $delete = 1;
die if (! GetOptions(
"help" => sub { print $usage; exit; },
"access-key-id=s" => \$aws_access_key_id,
"secret-access-key=s" => \$aws_secret_access_key,
"bucket=s" => \$bucket_name,
"processess=i" => \$procs,
"wait=i" => \$wait,
"delete!" => \$delete,
));
die if (! ($aws_access_key_id && $aws_secret_access_key && $bucket_name));
my $increment = 0;
print "Incrementally deleting the contents of $bucket_name\n";
$| = 1;
my(@procs, $current);
for (1..$procs) {
my($read_from_parent, $write_to_child);
my($read_from_child, $write_to_parent);
pipe($read_from_parent, $write_to_child) or die;
pipe($read_from_child, $write_to_parent) or die;
threads->create(sub {
close($read_from_child);
close($write_to_child);
my $old_select = select $write_to_parent;
$| = 1;
select $old_select;
&child($read_from_parent, $write_to_parent);
}) or die;
close($read_from_parent);
close($write_to_parent);
my $old_select = select $write_to_child;
$| = 1;
select $old_select;
push(@procs, [$read_from_child, $write_to_child]);
}
my $s3 = Net::Amazon::S3->new({aws_access_key_id => $aws_access_key_id,
aws_secret_access_key => $aws_secret_access_key,
retry => 1,
});
my $bucket = $s3->bucket($bucket_name);
my $deleted = 1;
my $total_deleted = 0;
my $last_start = time;
my($start, $waited);
while ($deleted > 0) {
$start = time;
print "\nLoading ", ($increment ? "up to $increment" :
"as many as possible")," keys...\n";
my $response = $bucket->list({$increment ? ('max-keys' => $increment) : ()})
or die $s3->err . ": " . $s3->errstr . "\n";
$deleted = scalar(@{ $response->{keys} }) ;
if (! $deleted) {
if ($wait and ! $waited) {
my $delta = $wait - ($start - $last_start);
if ($delta > 0) {
print "Waiting $delta second(s) to confirm bucket is empty\n";
sleep($delta);
$waited = 1;
$deleted = 1;
next;
}
else {
last;
}
}
else {
last;
}
}
else {
$waited = undef;
}
$total_deleted += $deleted;
print "\nDeleting $deleted keys($total_deleted total)...\n";
$current = 0;
foreach my $key ( @{ $response->{keys} } ) {
my $key_name = $key->{key};
while (! &send(escape($key_name) . "\n")) {
print "Thread $current died\n";
die "No threads left\n" if (@procs == 1);
if ($current == @procs-1) {
pop @procs;
$current = 0;
}
else {
$procs[$current] = pop @procs;
}
}
$current = ($current + 1) % @procs;
threads->yield();
}
print "Sending sync message\n";
for ($current = 0; $current < @procs; $current++) {
if (! &send("\n")) {
print "Thread $current died sending sync\n";
if ($current = @procs-1) {
pop @procs;
last;
}
$procs[$current] = pop @procs;
$current--;
}
threads->yield();
}
print "Reading sync response\n";
for ($current = 0; $current < @procs; $current++) {
if (! &receive()) {
print "Thread $current died reading sync\n";
if ($current = @procs-1) {
pop @procs;
last;
}
$procs[$current] = pop @procs;
$current--;
}
threads->yield();
}
}
continue {
$last_start = $start;
}
if ($delete) {
print "Deleting bucket...\n";
$bucket->delete_bucket or die $s3->err . ": " . $s3->errstr;
print "Done.\n";
}
sub send {
my($str) = @_;
my $fh = $procs[$current]->[1];
print($fh $str);
}
sub receive {
my $fh = $procs[$current]->[0];
scalar <$fh>;
}
sub child {
my($read, $write) = @_;
threads->detach();
my $s3 = Net::Amazon::S3->new({aws_access_key_id => $aws_access_key_id,
aws_secret_access_key => $aws_secret_access_key,
retry => 1,
});
my $bucket = $s3->bucket($bucket_name);
while (my $key = <$read>) {
if ($key eq "\n") {
print($write "\n") or die;
next;
}
chomp $key;
$key = unescape($key);
if ($key =~ /[\r\n]/) {
my(@parts) = split(/\r\n|\r|\n/, $key, -1);
my(@guesses) = shift @parts;
foreach my $part (@parts) {
@guesses = (map(($_ . "\r\n" . $part,
$_ . "\r" . $part,
$_ . "\n" . $part), @guesses));
}
foreach my $guess (@guesses) {
if ($bucket->get_key($guess)) {
$key = $guess;
last;
}
}
}
$bucket->delete_key($key) or
die $s3->err . ": " . $s3->errstr . "\n";
print ".";
threads->yield();
}
return;
}