When using gridFS, expiration time (ttl) set by client application works for documents in metadata collection but does not work in chunks collection. This results in chunks not getting cleaned up from the mongodb.
https://jira.mongodb.org/browse/SERVER-23165
try {
var cnt=0;
var totalRemoved = 0;
var totalRemovedPre = 0;
var totalCount = 0;
var progress = 0;
var batch = 1000;
totalCount = db.fs.files.estimatedDocumentCount();
print("total n:0 chunks: "+totalCount);
var pages = ~~ (totalCount / batch);
var oid = ObjectId('000000000000000000000000');
for (var page=0; page<= pages; page++) {
print("page "+page+"/"+pages+" oid: "+oid+" deleted: "+totalRemoved);
var doids = []; db.fs.chunks.find({ _id: { '$gt': oid }, n: 0 },{data:0}).limit(batch).noCursorTimeout().forEach(function loop(chunk) {
oid = chunk._id;
var parentCount = db.fs.files.countDocuments({ '_id' : chunk.files_id });
if (parentCount === 0 ){ doids.push(chunk.files_id); } });
var result = db.fs.chunks.deleteMany({ "files_id" : { $in: doids }}, { hint: 'files_id_1_n_1'}); //printjson(result);
try { totalRemoved += result.deletedCount; } catch (e) {} if (page > 0)
if (page % 100 == 0) {
print(new Date());
if ( totalRemoved - totalRemovedPre < 190) {
throw 'Stupid loops';
}
totalRemovedPre = totalRemoved;
}
}
} catch (e) {
print(e);
}