Projekt

Obecné

Profil

Stáhnout (6.67 KB) Statistiky
| Větev: | Revize:
1
'use strict'
2

    
3
const BB = require('bluebird')
4

    
5
const contentPath = require('./content/path')
6
const figgyPudding = require('figgy-pudding')
7
const finished = BB.promisify(require('mississippi').finished)
8
const fixOwner = require('./util/fix-owner')
9
const fs = require('graceful-fs')
10
const glob = BB.promisify(require('glob'))
11
const index = require('./entry-index')
12
const path = require('path')
13
const rimraf = BB.promisify(require('rimraf'))
14
const ssri = require('ssri')
15

    
16
BB.promisifyAll(fs)
17

    
18
const VerifyOpts = figgyPudding({
19
  concurrency: {
20
    default: 20
21
  },
22
  filter: {},
23
  log: {
24
    default: { silly () {} }
25
  }
26
})
27

    
28
module.exports = verify
29
function verify (cache, opts) {
30
  opts = VerifyOpts(opts)
31
  opts.log.silly('verify', 'verifying cache at', cache)
32
  return BB.reduce([
33
    markStartTime,
34
    fixPerms,
35
    garbageCollect,
36
    rebuildIndex,
37
    cleanTmp,
38
    writeVerifile,
39
    markEndTime
40
  ], (stats, step, i) => {
41
    const label = step.name || `step #${i}`
42
    const start = new Date()
43
    return BB.resolve(step(cache, opts)).then(s => {
44
      s && Object.keys(s).forEach(k => {
45
        stats[k] = s[k]
46
      })
47
      const end = new Date()
48
      if (!stats.runTime) { stats.runTime = {} }
49
      stats.runTime[label] = end - start
50
      return stats
51
    })
52
  }, {}).tap(stats => {
53
    stats.runTime.total = stats.endTime - stats.startTime
54
    opts.log.silly('verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms`)
55
  })
56
}
57

    
58
function markStartTime (cache, opts) {
59
  return { startTime: new Date() }
60
}
61

    
62
function markEndTime (cache, opts) {
63
  return { endTime: new Date() }
64
}
65

    
66
function fixPerms (cache, opts) {
67
  opts.log.silly('verify', 'fixing cache permissions')
68
  return fixOwner.mkdirfix(cache, cache).then(() => {
69
    // TODO - fix file permissions too
70
    return fixOwner.chownr(cache, cache)
71
  }).then(() => null)
72
}
73

    
74
// Implements a naive mark-and-sweep tracing garbage collector.
75
//
76
// The algorithm is basically as follows:
77
// 1. Read (and filter) all index entries ("pointers")
78
// 2. Mark each integrity value as "live"
79
// 3. Read entire filesystem tree in `content-vX/` dir
80
// 4. If content is live, verify its checksum and delete it if it fails
81
// 5. If content is not marked as live, rimraf it.
82
//
83
function garbageCollect (cache, opts) {
84
  opts.log.silly('verify', 'garbage collecting content')
85
  const indexStream = index.lsStream(cache)
86
  const liveContent = new Set()
87
  indexStream.on('data', entry => {
88
    if (opts.filter && !opts.filter(entry)) { return }
89
    liveContent.add(entry.integrity.toString())
90
  })
91
  return finished(indexStream).then(() => {
92
    const contentDir = contentPath._contentDir(cache)
93
    return glob(path.join(contentDir, '**'), {
94
      follow: false,
95
      nodir: true,
96
      nosort: true
97
    }).then(files => {
98
      return BB.resolve({
99
        verifiedContent: 0,
100
        reclaimedCount: 0,
101
        reclaimedSize: 0,
102
        badContentCount: 0,
103
        keptSize: 0
104
      }).tap((stats) => BB.map(files, (f) => {
105
        const split = f.split(/[/\\]/)
106
        const digest = split.slice(split.length - 3).join('')
107
        const algo = split[split.length - 4]
108
        const integrity = ssri.fromHex(digest, algo)
109
        if (liveContent.has(integrity.toString())) {
110
          return verifyContent(f, integrity).then(info => {
111
            if (!info.valid) {
112
              stats.reclaimedCount++
113
              stats.badContentCount++
114
              stats.reclaimedSize += info.size
115
            } else {
116
              stats.verifiedContent++
117
              stats.keptSize += info.size
118
            }
119
            return stats
120
          })
121
        } else {
122
          // No entries refer to this content. We can delete.
123
          stats.reclaimedCount++
124
          return fs.statAsync(f).then(s => {
125
            return rimraf(f).then(() => {
126
              stats.reclaimedSize += s.size
127
              return stats
128
            })
129
          })
130
        }
131
      }, { concurrency: opts.concurrency }))
132
    })
133
  })
134
}
135

    
136
function verifyContent (filepath, sri) {
137
  return fs.statAsync(filepath).then(stat => {
138
    const contentInfo = {
139
      size: stat.size,
140
      valid: true
141
    }
142
    return ssri.checkStream(
143
      fs.createReadStream(filepath),
144
      sri
145
    ).catch(err => {
146
      if (err.code !== 'EINTEGRITY') { throw err }
147
      return rimraf(filepath).then(() => {
148
        contentInfo.valid = false
149
      })
150
    }).then(() => contentInfo)
151
  }).catch({ code: 'ENOENT' }, () => ({ size: 0, valid: false }))
152
}
153

    
154
function rebuildIndex (cache, opts) {
155
  opts.log.silly('verify', 'rebuilding index')
156
  return index.ls(cache).then(entries => {
157
    const stats = {
158
      missingContent: 0,
159
      rejectedEntries: 0,
160
      totalEntries: 0
161
    }
162
    const buckets = {}
163
    for (let k in entries) {
164
      if (entries.hasOwnProperty(k)) {
165
        const hashed = index._hashKey(k)
166
        const entry = entries[k]
167
        const excluded = opts.filter && !opts.filter(entry)
168
        excluded && stats.rejectedEntries++
169
        if (buckets[hashed] && !excluded) {
170
          buckets[hashed].push(entry)
171
        } else if (buckets[hashed] && excluded) {
172
          // skip
173
        } else if (excluded) {
174
          buckets[hashed] = []
175
          buckets[hashed]._path = index._bucketPath(cache, k)
176
        } else {
177
          buckets[hashed] = [entry]
178
          buckets[hashed]._path = index._bucketPath(cache, k)
179
        }
180
      }
181
    }
182
    return BB.map(Object.keys(buckets), key => {
183
      return rebuildBucket(cache, buckets[key], stats, opts)
184
    }, { concurrency: opts.concurrency }).then(() => stats)
185
  })
186
}
187

    
188
function rebuildBucket (cache, bucket, stats, opts) {
189
  return fs.truncateAsync(bucket._path).then(() => {
190
    // This needs to be serialized because cacache explicitly
191
    // lets very racy bucket conflicts clobber each other.
192
    return BB.mapSeries(bucket, entry => {
193
      const content = contentPath(cache, entry.integrity)
194
      return fs.statAsync(content).then(() => {
195
        return index.insert(cache, entry.key, entry.integrity, {
196
          metadata: entry.metadata,
197
          size: entry.size
198
        }).then(() => { stats.totalEntries++ })
199
      }).catch({ code: 'ENOENT' }, () => {
200
        stats.rejectedEntries++
201
        stats.missingContent++
202
      })
203
    })
204
  })
205
}
206

    
207
function cleanTmp (cache, opts) {
208
  opts.log.silly('verify', 'cleaning tmp directory')
209
  return rimraf(path.join(cache, 'tmp'))
210
}
211

    
212
function writeVerifile (cache, opts) {
213
  const verifile = path.join(cache, '_lastverified')
214
  opts.log.silly('verify', 'writing verifile to ' + verifile)
215
  try {
216
    return fs.writeFileAsync(verifile, '' + (+(new Date())))
217
  } finally {
218
    fixOwner.chownr.sync(cache, verifile)
219
  }
220
}
221

    
222
module.exports.lastRun = lastRun
223
function lastRun (cache) {
224
  return fs.readFileAsync(
225
    path.join(cache, '_lastverified'), 'utf8'
226
  ).then(data => new Date(+data))
227
}
(3-3/3)