1 |
3a515b92
|
cagy
|
'use strict'
|
2 |
|
|
|
3 |
|
|
const BB = require('bluebird')
|
4 |
|
|
|
5 |
|
|
const contentPath = require('./content/path')
|
6 |
|
|
const figgyPudding = require('figgy-pudding')
|
7 |
|
|
const finished = BB.promisify(require('mississippi').finished)
|
8 |
|
|
const fixOwner = require('./util/fix-owner')
|
9 |
|
|
const fs = require('graceful-fs')
|
10 |
|
|
const glob = BB.promisify(require('glob'))
|
11 |
|
|
const index = require('./entry-index')
|
12 |
|
|
const path = require('path')
|
13 |
|
|
const rimraf = BB.promisify(require('rimraf'))
|
14 |
|
|
const ssri = require('ssri')
|
15 |
|
|
|
16 |
|
|
BB.promisifyAll(fs)
|
17 |
|
|
|
18 |
|
|
const VerifyOpts = figgyPudding({
|
19 |
|
|
concurrency: {
|
20 |
|
|
default: 20
|
21 |
|
|
},
|
22 |
|
|
filter: {},
|
23 |
|
|
log: {
|
24 |
|
|
default: { silly () {} }
|
25 |
|
|
}
|
26 |
|
|
})
|
27 |
|
|
|
28 |
|
|
module.exports = verify
|
29 |
|
|
function verify (cache, opts) {
|
30 |
|
|
opts = VerifyOpts(opts)
|
31 |
|
|
opts.log.silly('verify', 'verifying cache at', cache)
|
32 |
|
|
return BB.reduce([
|
33 |
|
|
markStartTime,
|
34 |
|
|
fixPerms,
|
35 |
|
|
garbageCollect,
|
36 |
|
|
rebuildIndex,
|
37 |
|
|
cleanTmp,
|
38 |
|
|
writeVerifile,
|
39 |
|
|
markEndTime
|
40 |
|
|
], (stats, step, i) => {
|
41 |
|
|
const label = step.name || `step #${i}`
|
42 |
|
|
const start = new Date()
|
43 |
|
|
return BB.resolve(step(cache, opts)).then(s => {
|
44 |
|
|
s && Object.keys(s).forEach(k => {
|
45 |
|
|
stats[k] = s[k]
|
46 |
|
|
})
|
47 |
|
|
const end = new Date()
|
48 |
|
|
if (!stats.runTime) { stats.runTime = {} }
|
49 |
|
|
stats.runTime[label] = end - start
|
50 |
|
|
return stats
|
51 |
|
|
})
|
52 |
|
|
}, {}).tap(stats => {
|
53 |
|
|
stats.runTime.total = stats.endTime - stats.startTime
|
54 |
|
|
opts.log.silly('verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms`)
|
55 |
|
|
})
|
56 |
|
|
}
|
57 |
|
|
|
58 |
|
|
function markStartTime (cache, opts) {
|
59 |
|
|
return { startTime: new Date() }
|
60 |
|
|
}
|
61 |
|
|
|
62 |
|
|
function markEndTime (cache, opts) {
|
63 |
|
|
return { endTime: new Date() }
|
64 |
|
|
}
|
65 |
|
|
|
66 |
|
|
function fixPerms (cache, opts) {
|
67 |
|
|
opts.log.silly('verify', 'fixing cache permissions')
|
68 |
|
|
return fixOwner.mkdirfix(cache, cache).then(() => {
|
69 |
|
|
// TODO - fix file permissions too
|
70 |
|
|
return fixOwner.chownr(cache, cache)
|
71 |
|
|
}).then(() => null)
|
72 |
|
|
}
|
73 |
|
|
|
74 |
|
|
// Implements a naive mark-and-sweep tracing garbage collector.
|
75 |
|
|
//
|
76 |
|
|
// The algorithm is basically as follows:
|
77 |
|
|
// 1. Read (and filter) all index entries ("pointers")
|
78 |
|
|
// 2. Mark each integrity value as "live"
|
79 |
|
|
// 3. Read entire filesystem tree in `content-vX/` dir
|
80 |
|
|
// 4. If content is live, verify its checksum and delete it if it fails
|
81 |
|
|
// 5. If content is not marked as live, rimraf it.
|
82 |
|
|
//
|
83 |
|
|
function garbageCollect (cache, opts) {
|
84 |
|
|
opts.log.silly('verify', 'garbage collecting content')
|
85 |
|
|
const indexStream = index.lsStream(cache)
|
86 |
|
|
const liveContent = new Set()
|
87 |
|
|
indexStream.on('data', entry => {
|
88 |
|
|
if (opts.filter && !opts.filter(entry)) { return }
|
89 |
|
|
liveContent.add(entry.integrity.toString())
|
90 |
|
|
})
|
91 |
|
|
return finished(indexStream).then(() => {
|
92 |
|
|
const contentDir = contentPath._contentDir(cache)
|
93 |
|
|
return glob(path.join(contentDir, '**'), {
|
94 |
|
|
follow: false,
|
95 |
|
|
nodir: true,
|
96 |
|
|
nosort: true
|
97 |
|
|
}).then(files => {
|
98 |
|
|
return BB.resolve({
|
99 |
|
|
verifiedContent: 0,
|
100 |
|
|
reclaimedCount: 0,
|
101 |
|
|
reclaimedSize: 0,
|
102 |
|
|
badContentCount: 0,
|
103 |
|
|
keptSize: 0
|
104 |
|
|
}).tap((stats) => BB.map(files, (f) => {
|
105 |
|
|
const split = f.split(/[/\\]/)
|
106 |
|
|
const digest = split.slice(split.length - 3).join('')
|
107 |
|
|
const algo = split[split.length - 4]
|
108 |
|
|
const integrity = ssri.fromHex(digest, algo)
|
109 |
|
|
if (liveContent.has(integrity.toString())) {
|
110 |
|
|
return verifyContent(f, integrity).then(info => {
|
111 |
|
|
if (!info.valid) {
|
112 |
|
|
stats.reclaimedCount++
|
113 |
|
|
stats.badContentCount++
|
114 |
|
|
stats.reclaimedSize += info.size
|
115 |
|
|
} else {
|
116 |
|
|
stats.verifiedContent++
|
117 |
|
|
stats.keptSize += info.size
|
118 |
|
|
}
|
119 |
|
|
return stats
|
120 |
|
|
})
|
121 |
|
|
} else {
|
122 |
|
|
// No entries refer to this content. We can delete.
|
123 |
|
|
stats.reclaimedCount++
|
124 |
|
|
return fs.statAsync(f).then(s => {
|
125 |
|
|
return rimraf(f).then(() => {
|
126 |
|
|
stats.reclaimedSize += s.size
|
127 |
|
|
return stats
|
128 |
|
|
})
|
129 |
|
|
})
|
130 |
|
|
}
|
131 |
|
|
}, { concurrency: opts.concurrency }))
|
132 |
|
|
})
|
133 |
|
|
})
|
134 |
|
|
}
|
135 |
|
|
|
136 |
|
|
function verifyContent (filepath, sri) {
|
137 |
|
|
return fs.statAsync(filepath).then(stat => {
|
138 |
|
|
const contentInfo = {
|
139 |
|
|
size: stat.size,
|
140 |
|
|
valid: true
|
141 |
|
|
}
|
142 |
|
|
return ssri.checkStream(
|
143 |
|
|
fs.createReadStream(filepath),
|
144 |
|
|
sri
|
145 |
|
|
).catch(err => {
|
146 |
|
|
if (err.code !== 'EINTEGRITY') { throw err }
|
147 |
|
|
return rimraf(filepath).then(() => {
|
148 |
|
|
contentInfo.valid = false
|
149 |
|
|
})
|
150 |
|
|
}).then(() => contentInfo)
|
151 |
|
|
}).catch({ code: 'ENOENT' }, () => ({ size: 0, valid: false }))
|
152 |
|
|
}
|
153 |
|
|
|
154 |
|
|
function rebuildIndex (cache, opts) {
|
155 |
|
|
opts.log.silly('verify', 'rebuilding index')
|
156 |
|
|
return index.ls(cache).then(entries => {
|
157 |
|
|
const stats = {
|
158 |
|
|
missingContent: 0,
|
159 |
|
|
rejectedEntries: 0,
|
160 |
|
|
totalEntries: 0
|
161 |
|
|
}
|
162 |
|
|
const buckets = {}
|
163 |
|
|
for (let k in entries) {
|
164 |
|
|
if (entries.hasOwnProperty(k)) {
|
165 |
|
|
const hashed = index._hashKey(k)
|
166 |
|
|
const entry = entries[k]
|
167 |
|
|
const excluded = opts.filter && !opts.filter(entry)
|
168 |
|
|
excluded && stats.rejectedEntries++
|
169 |
|
|
if (buckets[hashed] && !excluded) {
|
170 |
|
|
buckets[hashed].push(entry)
|
171 |
|
|
} else if (buckets[hashed] && excluded) {
|
172 |
|
|
// skip
|
173 |
|
|
} else if (excluded) {
|
174 |
|
|
buckets[hashed] = []
|
175 |
|
|
buckets[hashed]._path = index._bucketPath(cache, k)
|
176 |
|
|
} else {
|
177 |
|
|
buckets[hashed] = [entry]
|
178 |
|
|
buckets[hashed]._path = index._bucketPath(cache, k)
|
179 |
|
|
}
|
180 |
|
|
}
|
181 |
|
|
}
|
182 |
|
|
return BB.map(Object.keys(buckets), key => {
|
183 |
|
|
return rebuildBucket(cache, buckets[key], stats, opts)
|
184 |
|
|
}, { concurrency: opts.concurrency }).then(() => stats)
|
185 |
|
|
})
|
186 |
|
|
}
|
187 |
|
|
|
188 |
|
|
function rebuildBucket (cache, bucket, stats, opts) {
|
189 |
|
|
return fs.truncateAsync(bucket._path).then(() => {
|
190 |
|
|
// This needs to be serialized because cacache explicitly
|
191 |
|
|
// lets very racy bucket conflicts clobber each other.
|
192 |
|
|
return BB.mapSeries(bucket, entry => {
|
193 |
|
|
const content = contentPath(cache, entry.integrity)
|
194 |
|
|
return fs.statAsync(content).then(() => {
|
195 |
|
|
return index.insert(cache, entry.key, entry.integrity, {
|
196 |
|
|
metadata: entry.metadata,
|
197 |
|
|
size: entry.size
|
198 |
|
|
}).then(() => { stats.totalEntries++ })
|
199 |
|
|
}).catch({ code: 'ENOENT' }, () => {
|
200 |
|
|
stats.rejectedEntries++
|
201 |
|
|
stats.missingContent++
|
202 |
|
|
})
|
203 |
|
|
})
|
204 |
|
|
})
|
205 |
|
|
}
|
206 |
|
|
|
207 |
|
|
function cleanTmp (cache, opts) {
|
208 |
|
|
opts.log.silly('verify', 'cleaning tmp directory')
|
209 |
|
|
return rimraf(path.join(cache, 'tmp'))
|
210 |
|
|
}
|
211 |
|
|
|
212 |
|
|
function writeVerifile (cache, opts) {
|
213 |
|
|
const verifile = path.join(cache, '_lastverified')
|
214 |
|
|
opts.log.silly('verify', 'writing verifile to ' + verifile)
|
215 |
|
|
try {
|
216 |
|
|
return fs.writeFileAsync(verifile, '' + (+(new Date())))
|
217 |
|
|
} finally {
|
218 |
|
|
fixOwner.chownr.sync(cache, verifile)
|
219 |
|
|
}
|
220 |
|
|
}
|
221 |
|
|
|
222 |
|
|
module.exports.lastRun = lastRun
|
223 |
|
|
function lastRun (cache) {
|
224 |
|
|
return fs.readFileAsync(
|
225 |
|
|
path.join(cache, '_lastverified'), 'utf8'
|
226 |
|
|
).then(data => new Date(+data))
|
227 |
|
|
}
|