Fast cleanup of orphaned MongoDB GridFS chunks

When using gridFS, expiration time (ttl) set by client application works for documents in metadata collection but does not work in chunks collection. This results in chunks not getting cleaned up from the mongodb.

https://jira.mongodb.org/browse/SERVER-23165



try {
var cnt=0;
var totalRemoved = 0;
var totalRemovedPre = 0;
var totalCount = 0;
var progress = 0;
var batch = 1000;
totalCount = db.fs.files.estimatedDocumentCount();
print("total n:0 chunks: "+totalCount);

var pages = ~~ (totalCount / batch);
var oid = ObjectId('000000000000000000000000');

for (var page=0; page<= pages; page++) {
print("page "+page+"/"+pages+" oid: "+oid+" deleted: "+totalRemoved);
var doids = []; db.fs.chunks.find({ _id: { '$gt': oid }, n: 0 },{data:0}).limit(batch).noCursorTimeout().forEach(function loop(chunk) {
oid = chunk._id;
var parentCount = db.fs.files.countDocuments({ '_id' : chunk.files_id });
if (parentCount === 0 ){ doids.push(chunk.files_id); } });
var result = db.fs.chunks.deleteMany({ "files_id" : { $in: doids }}, { hint: 'files_id_1_n_1'}); //printjson(result);
try { totalRemoved += result.deletedCount; } catch (e) {} if (page > 0)
if (page % 100 == 0) {
print(new Date());
if ( totalRemoved - totalRemovedPre < 190) {
throw 'Stupid loops';
}
totalRemovedPre = totalRemoved;
}
}
} catch (e) {
print(e);
}


Залишити відповідь

Ваша e-mail адреса не оприлюднюватиметься. Обов’язкові поля позначені *