parse.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. 'use strict'
  2. // this[BUFFER] is the remainder of a chunk if we're waiting for
  3. // the full 512 bytes of a header to come in. We will Buffer.concat()
  4. // it to the next write(), which is a mem copy, but a small one.
  5. //
  6. // this[QUEUE] is a Yallist of entries that haven't been emitted
  7. // yet this can only get filled up if the user keeps write()ing after
  8. // a write() returns false, or does a write() with more than one entry
  9. //
  10. // We don't buffer chunks, we always parse them and either create an
  11. // entry, or push it into the active entry. The ReadEntry class knows
  12. // to throw data away if .ignore=true
  13. //
  14. // Shift entry off the buffer when it emits 'end', and emit 'entry' for
  15. // the next one in the list.
  16. //
  17. // At any time, we're pushing body chunks into the entry at WRITEENTRY,
  18. // and waiting for 'end' on the entry at READENTRY
  19. //
  20. // ignored entries get .resume() called on them straight away
  21. const warner = require('./warn-mixin.js')
  22. const Header = require('./header.js')
  23. const EE = require('events')
  24. const Yallist = require('yallist')
  25. const maxMetaEntrySize = 1024 * 1024
  26. const Entry = require('./read-entry.js')
  27. const Pax = require('./pax.js')
  28. const zlib = require('minizlib')
  29. const { nextTick } = require('process')
  30. const gzipHeader = Buffer.from([0x1f, 0x8b])
  31. const STATE = Symbol('state')
  32. const WRITEENTRY = Symbol('writeEntry')
  33. const READENTRY = Symbol('readEntry')
  34. const NEXTENTRY = Symbol('nextEntry')
  35. const PROCESSENTRY = Symbol('processEntry')
  36. const EX = Symbol('extendedHeader')
  37. const GEX = Symbol('globalExtendedHeader')
  38. const META = Symbol('meta')
  39. const EMITMETA = Symbol('emitMeta')
  40. const BUFFER = Symbol('buffer')
  41. const QUEUE = Symbol('queue')
  42. const ENDED = Symbol('ended')
  43. const EMITTEDEND = Symbol('emittedEnd')
  44. const EMIT = Symbol('emit')
  45. const UNZIP = Symbol('unzip')
  46. const CONSUMECHUNK = Symbol('consumeChunk')
  47. const CONSUMECHUNKSUB = Symbol('consumeChunkSub')
  48. const CONSUMEBODY = Symbol('consumeBody')
  49. const CONSUMEMETA = Symbol('consumeMeta')
  50. const CONSUMEHEADER = Symbol('consumeHeader')
  51. const CONSUMING = Symbol('consuming')
  52. const BUFFERCONCAT = Symbol('bufferConcat')
  53. const MAYBEEND = Symbol('maybeEnd')
  54. const WRITING = Symbol('writing')
  55. const ABORTED = Symbol('aborted')
  56. const DONE = Symbol('onDone')
  57. const SAW_VALID_ENTRY = Symbol('sawValidEntry')
  58. const SAW_NULL_BLOCK = Symbol('sawNullBlock')
  59. const SAW_EOF = Symbol('sawEOF')
  60. const CLOSESTREAM = Symbol('closeStream')
  61. const noop = _ => true
  62. module.exports = warner(class Parser extends EE {
  63. constructor (opt) {
  64. opt = opt || {}
  65. super(opt)
  66. this.file = opt.file || ''
  67. // set to boolean false when an entry starts. 1024 bytes of \0
  68. // is technically a valid tarball, albeit a boring one.
  69. this[SAW_VALID_ENTRY] = null
  70. // these BADARCHIVE errors can't be detected early. listen on DONE.
  71. this.on(DONE, _ => {
  72. if (this[STATE] === 'begin' || this[SAW_VALID_ENTRY] === false) {
  73. // either less than 1 block of data, or all entries were invalid.
  74. // Either way, probably not even a tarball.
  75. this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format')
  76. }
  77. })
  78. if (opt.ondone) {
  79. this.on(DONE, opt.ondone)
  80. } else {
  81. this.on(DONE, _ => {
  82. this.emit('prefinish')
  83. this.emit('finish')
  84. this.emit('end')
  85. })
  86. }
  87. this.strict = !!opt.strict
  88. this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize
  89. this.filter = typeof opt.filter === 'function' ? opt.filter : noop
  90. // have to set this so that streams are ok piping into it
  91. this.writable = true
  92. this.readable = false
  93. this[QUEUE] = new Yallist()
  94. this[BUFFER] = null
  95. this[READENTRY] = null
  96. this[WRITEENTRY] = null
  97. this[STATE] = 'begin'
  98. this[META] = ''
  99. this[EX] = null
  100. this[GEX] = null
  101. this[ENDED] = false
  102. this[UNZIP] = null
  103. this[ABORTED] = false
  104. this[SAW_NULL_BLOCK] = false
  105. this[SAW_EOF] = false
  106. this.on('end', () => this[CLOSESTREAM]())
  107. if (typeof opt.onwarn === 'function') {
  108. this.on('warn', opt.onwarn)
  109. }
  110. if (typeof opt.onentry === 'function') {
  111. this.on('entry', opt.onentry)
  112. }
  113. }
  114. [CONSUMEHEADER] (chunk, position) {
  115. if (this[SAW_VALID_ENTRY] === null) {
  116. this[SAW_VALID_ENTRY] = false
  117. }
  118. let header
  119. try {
  120. header = new Header(chunk, position, this[EX], this[GEX])
  121. } catch (er) {
  122. return this.warn('TAR_ENTRY_INVALID', er)
  123. }
  124. if (header.nullBlock) {
  125. if (this[SAW_NULL_BLOCK]) {
  126. this[SAW_EOF] = true
  127. // ending an archive with no entries. pointless, but legal.
  128. if (this[STATE] === 'begin') {
  129. this[STATE] = 'header'
  130. }
  131. this[EMIT]('eof')
  132. } else {
  133. this[SAW_NULL_BLOCK] = true
  134. this[EMIT]('nullBlock')
  135. }
  136. } else {
  137. this[SAW_NULL_BLOCK] = false
  138. if (!header.cksumValid) {
  139. this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header })
  140. } else if (!header.path) {
  141. this.warn('TAR_ENTRY_INVALID', 'path is required', { header })
  142. } else {
  143. const type = header.type
  144. if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) {
  145. this.warn('TAR_ENTRY_INVALID', 'linkpath required', { header })
  146. } else if (!/^(Symbolic)?Link$/.test(type) && header.linkpath) {
  147. this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { header })
  148. } else {
  149. const entry = this[WRITEENTRY] = new Entry(header, this[EX], this[GEX])
  150. // we do this for meta & ignored entries as well, because they
  151. // are still valid tar, or else we wouldn't know to ignore them
  152. if (!this[SAW_VALID_ENTRY]) {
  153. if (entry.remain) {
  154. // this might be the one!
  155. const onend = () => {
  156. if (!entry.invalid) {
  157. this[SAW_VALID_ENTRY] = true
  158. }
  159. }
  160. entry.on('end', onend)
  161. } else {
  162. this[SAW_VALID_ENTRY] = true
  163. }
  164. }
  165. if (entry.meta) {
  166. if (entry.size > this.maxMetaEntrySize) {
  167. entry.ignore = true
  168. this[EMIT]('ignoredEntry', entry)
  169. this[STATE] = 'ignore'
  170. entry.resume()
  171. } else if (entry.size > 0) {
  172. this[META] = ''
  173. entry.on('data', c => this[META] += c)
  174. this[STATE] = 'meta'
  175. }
  176. } else {
  177. this[EX] = null
  178. entry.ignore = entry.ignore || !this.filter(entry.path, entry)
  179. if (entry.ignore) {
  180. // probably valid, just not something we care about
  181. this[EMIT]('ignoredEntry', entry)
  182. this[STATE] = entry.remain ? 'ignore' : 'header'
  183. entry.resume()
  184. } else {
  185. if (entry.remain) {
  186. this[STATE] = 'body'
  187. } else {
  188. this[STATE] = 'header'
  189. entry.end()
  190. }
  191. if (!this[READENTRY]) {
  192. this[QUEUE].push(entry)
  193. this[NEXTENTRY]()
  194. } else {
  195. this[QUEUE].push(entry)
  196. }
  197. }
  198. }
  199. }
  200. }
  201. }
  202. }
  203. [CLOSESTREAM] () {
  204. nextTick(() => this.emit('close'))
  205. }
  206. [PROCESSENTRY] (entry) {
  207. let go = true
  208. if (!entry) {
  209. this[READENTRY] = null
  210. go = false
  211. } else if (Array.isArray(entry)) {
  212. this.emit.apply(this, entry)
  213. } else {
  214. this[READENTRY] = entry
  215. this.emit('entry', entry)
  216. if (!entry.emittedEnd) {
  217. entry.on('end', _ => this[NEXTENTRY]())
  218. go = false
  219. }
  220. }
  221. return go
  222. }
  223. [NEXTENTRY] () {
  224. do {} while (this[PROCESSENTRY](this[QUEUE].shift()))
  225. if (!this[QUEUE].length) {
  226. // At this point, there's nothing in the queue, but we may have an
  227. // entry which is being consumed (readEntry).
  228. // If we don't, then we definitely can handle more data.
  229. // If we do, and either it's flowing, or it has never had any data
  230. // written to it, then it needs more.
  231. // The only other possibility is that it has returned false from a
  232. // write() call, so we wait for the next drain to continue.
  233. const re = this[READENTRY]
  234. const drainNow = !re || re.flowing || re.size === re.remain
  235. if (drainNow) {
  236. if (!this[WRITING]) {
  237. this.emit('drain')
  238. }
  239. } else {
  240. re.once('drain', _ => this.emit('drain'))
  241. }
  242. }
  243. }
  244. [CONSUMEBODY] (chunk, position) {
  245. // write up to but no more than writeEntry.blockRemain
  246. const entry = this[WRITEENTRY]
  247. const br = entry.blockRemain
  248. const c = (br >= chunk.length && position === 0) ? chunk
  249. : chunk.slice(position, position + br)
  250. entry.write(c)
  251. if (!entry.blockRemain) {
  252. this[STATE] = 'header'
  253. this[WRITEENTRY] = null
  254. entry.end()
  255. }
  256. return c.length
  257. }
  258. [CONSUMEMETA] (chunk, position) {
  259. const entry = this[WRITEENTRY]
  260. const ret = this[CONSUMEBODY](chunk, position)
  261. // if we finished, then the entry is reset
  262. if (!this[WRITEENTRY]) {
  263. this[EMITMETA](entry)
  264. }
  265. return ret
  266. }
  267. [EMIT] (ev, data, extra) {
  268. if (!this[QUEUE].length && !this[READENTRY]) {
  269. this.emit(ev, data, extra)
  270. } else {
  271. this[QUEUE].push([ev, data, extra])
  272. }
  273. }
  274. [EMITMETA] (entry) {
  275. this[EMIT]('meta', this[META])
  276. switch (entry.type) {
  277. case 'ExtendedHeader':
  278. case 'OldExtendedHeader':
  279. this[EX] = Pax.parse(this[META], this[EX], false)
  280. break
  281. case 'GlobalExtendedHeader':
  282. this[GEX] = Pax.parse(this[META], this[GEX], true)
  283. break
  284. case 'NextFileHasLongPath':
  285. case 'OldGnuLongPath':
  286. this[EX] = this[EX] || Object.create(null)
  287. this[EX].path = this[META].replace(/\0.*/, '')
  288. break
  289. case 'NextFileHasLongLinkpath':
  290. this[EX] = this[EX] || Object.create(null)
  291. this[EX].linkpath = this[META].replace(/\0.*/, '')
  292. break
  293. /* istanbul ignore next */
  294. default: throw new Error('unknown meta: ' + entry.type)
  295. }
  296. }
  297. abort (error) {
  298. this[ABORTED] = true
  299. this.emit('abort', error)
  300. // always throws, even in non-strict mode
  301. this.warn('TAR_ABORT', error, { recoverable: false })
  302. }
  303. write (chunk) {
  304. if (this[ABORTED]) {
  305. return
  306. }
  307. // first write, might be gzipped
  308. if (this[UNZIP] === null && chunk) {
  309. if (this[BUFFER]) {
  310. chunk = Buffer.concat([this[BUFFER], chunk])
  311. this[BUFFER] = null
  312. }
  313. if (chunk.length < gzipHeader.length) {
  314. this[BUFFER] = chunk
  315. return true
  316. }
  317. for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
  318. if (chunk[i] !== gzipHeader[i]) {
  319. this[UNZIP] = false
  320. }
  321. }
  322. if (this[UNZIP] === null) {
  323. const ended = this[ENDED]
  324. this[ENDED] = false
  325. this[UNZIP] = new zlib.Unzip()
  326. this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
  327. this[UNZIP].on('error', er => this.abort(er))
  328. this[UNZIP].on('end', _ => {
  329. this[ENDED] = true
  330. this[CONSUMECHUNK]()
  331. })
  332. this[WRITING] = true
  333. const ret = this[UNZIP][ended ? 'end' : 'write'](chunk)
  334. this[WRITING] = false
  335. return ret
  336. }
  337. }
  338. this[WRITING] = true
  339. if (this[UNZIP]) {
  340. this[UNZIP].write(chunk)
  341. } else {
  342. this[CONSUMECHUNK](chunk)
  343. }
  344. this[WRITING] = false
  345. // return false if there's a queue, or if the current entry isn't flowing
  346. const ret =
  347. this[QUEUE].length ? false :
  348. this[READENTRY] ? this[READENTRY].flowing :
  349. true
  350. // if we have no queue, then that means a clogged READENTRY
  351. if (!ret && !this[QUEUE].length) {
  352. this[READENTRY].once('drain', _ => this.emit('drain'))
  353. }
  354. return ret
  355. }
  356. [BUFFERCONCAT] (c) {
  357. if (c && !this[ABORTED]) {
  358. this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c
  359. }
  360. }
  361. [MAYBEEND] () {
  362. if (this[ENDED] &&
  363. !this[EMITTEDEND] &&
  364. !this[ABORTED] &&
  365. !this[CONSUMING]) {
  366. this[EMITTEDEND] = true
  367. const entry = this[WRITEENTRY]
  368. if (entry && entry.blockRemain) {
  369. // truncated, likely a damaged file
  370. const have = this[BUFFER] ? this[BUFFER].length : 0
  371. this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${
  372. entry.blockRemain} more bytes, only ${have} available)`, { entry })
  373. if (this[BUFFER]) {
  374. entry.write(this[BUFFER])
  375. }
  376. entry.end()
  377. }
  378. this[EMIT](DONE)
  379. }
  380. }
  381. [CONSUMECHUNK] (chunk) {
  382. if (this[CONSUMING]) {
  383. this[BUFFERCONCAT](chunk)
  384. } else if (!chunk && !this[BUFFER]) {
  385. this[MAYBEEND]()
  386. } else {
  387. this[CONSUMING] = true
  388. if (this[BUFFER]) {
  389. this[BUFFERCONCAT](chunk)
  390. const c = this[BUFFER]
  391. this[BUFFER] = null
  392. this[CONSUMECHUNKSUB](c)
  393. } else {
  394. this[CONSUMECHUNKSUB](chunk)
  395. }
  396. while (this[BUFFER] &&
  397. this[BUFFER].length >= 512 &&
  398. !this[ABORTED] &&
  399. !this[SAW_EOF]) {
  400. const c = this[BUFFER]
  401. this[BUFFER] = null
  402. this[CONSUMECHUNKSUB](c)
  403. }
  404. this[CONSUMING] = false
  405. }
  406. if (!this[BUFFER] || this[ENDED]) {
  407. this[MAYBEEND]()
  408. }
  409. }
  410. [CONSUMECHUNKSUB] (chunk) {
  411. // we know that we are in CONSUMING mode, so anything written goes into
  412. // the buffer. Advance the position and put any remainder in the buffer.
  413. let position = 0
  414. const length = chunk.length
  415. while (position + 512 <= length && !this[ABORTED] && !this[SAW_EOF]) {
  416. switch (this[STATE]) {
  417. case 'begin':
  418. case 'header':
  419. this[CONSUMEHEADER](chunk, position)
  420. position += 512
  421. break
  422. case 'ignore':
  423. case 'body':
  424. position += this[CONSUMEBODY](chunk, position)
  425. break
  426. case 'meta':
  427. position += this[CONSUMEMETA](chunk, position)
  428. break
  429. /* istanbul ignore next */
  430. default:
  431. throw new Error('invalid state: ' + this[STATE])
  432. }
  433. }
  434. if (position < length) {
  435. if (this[BUFFER]) {
  436. this[BUFFER] = Buffer.concat([chunk.slice(position), this[BUFFER]])
  437. } else {
  438. this[BUFFER] = chunk.slice(position)
  439. }
  440. }
  441. }
  442. end (chunk) {
  443. if (!this[ABORTED]) {
  444. if (this[UNZIP]) {
  445. this[UNZIP].end(chunk)
  446. } else {
  447. this[ENDED] = true
  448. this.write(chunk)
  449. }
  450. }
  451. }
  452. })