DomHandler.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. //DomHandler.js
  2. var emoji; // 使用emoji补丁包时将此句改为 const emoji = require('./emoji.js');
  3. const CssHandler = require('./CssHandler.js');
  4. // #ifdef MP-WEIXIN
  5. const CanIUse = require('./api.js').versionHigherThan('2.7.1');
  6. // #endif
  7. const trustTag = {
  8. a: 0,
  9. abbr: 1,
  10. ad: 0,
  11. audio: 0,
  12. b: 1,
  13. blockquote: 1,
  14. br: 0,
  15. code: 1,
  16. col: 0,
  17. colgroup: 0,
  18. dd: 1,
  19. del: 1,
  20. dl: 1,
  21. dt: 1,
  22. div: 1,
  23. em: 1,
  24. fieldset: 0,
  25. font: 1,
  26. h1: 0,
  27. h2: 0,
  28. h3: 0,
  29. h4: 0,
  30. h5: 0,
  31. h6: 0,
  32. hr: 0,
  33. i: 1,
  34. img: 1,
  35. ins: 1,
  36. label: 1,
  37. legend: 0,
  38. li: 0,
  39. ol: 0,
  40. p: 1,
  41. q: 1,
  42. source: 0,
  43. span: 1,
  44. strong: 1,
  45. sub: 0,
  46. sup: 0,
  47. table: 0,
  48. tbody: 0,
  49. td: 0,
  50. tfoot: 0,
  51. th: 0,
  52. thead: 0,
  53. tr: 0,
  54. u: 1,
  55. ul: 0,
  56. video: 1
  57. };
  58. const blockTag = {
  59. address: true,
  60. article: true,
  61. aside: true,
  62. body: true,
  63. center: true,
  64. cite: true,
  65. footer: true,
  66. header: true,
  67. html: true,
  68. nav: true,
  69. pre: true,
  70. section: true
  71. };
  72. const ignoreTag = {
  73. area: true,
  74. base: true,
  75. basefont: true,
  76. canvas: true,
  77. circle: true,
  78. command: true,
  79. ellipse: true,
  80. embed: true,
  81. frame: true,
  82. head: true,
  83. iframe: true,
  84. input: true,
  85. isindex: true,
  86. keygen: true,
  87. line: true,
  88. link: true,
  89. map: true,
  90. meta: true,
  91. param: true,
  92. path: true,
  93. polygon: true,
  94. polyline: true,
  95. rect: true,
  96. script: true,
  97. stop: true,
  98. textarea: true,
  99. title: true,
  100. track: true,
  101. use: true,
  102. wbr: true
  103. };
  104. // #ifdef MP-WEIXIN
  105. if (CanIUse) {
  106. trustTag.bdi = 0;
  107. trustTag.bdo = 0;
  108. trustTag.caption = 0;
  109. trustTag.rt = 0;
  110. trustTag.ruby = 0;
  111. ignoreTag.rp = true;
  112. trustTag.big = 1;
  113. trustTag.small = 1;
  114. trustTag.pre = 0;
  115. delete blockTag.pre;
  116. } else blockTag.caption = true;
  117. // #endif
  118. function randomId() {
  119. var res = "";
  120. for (var i = 0; i < 5; i++) {
  121. let rand = parseInt(Math.random() * 52);
  122. if (rand < 26)
  123. res = res + String.fromCharCode(65 + rand);
  124. else
  125. res = res + String.fromCharCode(71 + rand);
  126. }
  127. return res;
  128. }
  129. function DomHandler(style, options) {
  130. this.imgList = [];
  131. this.imgIndex = 0;
  132. this.nodes = [];
  133. this.title = "";
  134. this._CssHandler = new CssHandler(style, options.tagStyle);
  135. this._tagStack = [];
  136. this._videoNum = 0;
  137. // #ifdef MP-BAIDU || MP-TOUTIAO || H5
  138. this._imgMode = options.imgMode;
  139. // #endif
  140. this._whiteSpace = false;
  141. this._domain = options.domain;
  142. this._protocol = /:\/\//.test(options.domain) ? options.domain.split(/:\/\//)[0] : "http";
  143. this._useAnchor = options.useAnchor;
  144. }
  145. DomHandler.prototype._addDomElement = function(element) {
  146. if (element.name == 'pre' || (element.attrs && /white-space\s*:\s*pre/.test(element.attrs.style))) {
  147. this._whiteSpace = true;
  148. element.pre = true;
  149. }
  150. let parent = this._tagStack[this._tagStack.length - 1];
  151. let siblings = parent ? parent.children : this.nodes;
  152. siblings.push(element);
  153. };
  154. DomHandler.prototype._bubbling = function() {
  155. for (let i = this._tagStack.length - 1; i >= 0; i--) {
  156. if (trustTag[this._tagStack[i].name])
  157. this._tagStack[i].continue = true;
  158. else
  159. return this._tagStack[i].name;
  160. }
  161. }
  162. DomHandler.prototype.onopentag = function(name, attrs) {
  163. let element = {
  164. children: []
  165. };
  166. let matched = this._CssHandler.match(name, attrs, element);
  167. //处理属性
  168. switch (name) {
  169. case 'div':
  170. case 'p':
  171. if (attrs.align) {
  172. attrs.style += (';text-align:' + attrs.align);
  173. delete attrs.align;
  174. }
  175. break;
  176. case 'img':
  177. if (attrs.width) {
  178. attrs.style = 'width:' + attrs.width + (/[0-9]/.test(attrs.width[attrs.width.length - 1]) ? 'px' : '') + ';' +
  179. attrs.style;
  180. delete attrs.width;
  181. }
  182. if (attrs['data-src']) {
  183. attrs.src = attrs.src || attrs['data-src'];
  184. delete attrs['data-src'];
  185. }
  186. // #ifdef MP-BAIDU || MP-TOUTIAO || H5
  187. if (this._imgMode == "widthFix") attrs.style += ";height:auto !important;";
  188. // #endif
  189. if (!attrs.hasOwnProperty('ignore') && attrs.src) {
  190. if (this._bubbling() == 'a') {
  191. attrs.ignore = "true"; // 图片在链接中不可预览
  192. break;
  193. }
  194. var url = attrs.src;
  195. // #ifdef MP-WEIXIN || MP-QQ || MP-BAIDU || MP-TOUTIAO
  196. // 去重,至多重试10次
  197. for (let i = 0; this.imgList.indexOf(url) != -1 && i < 10; i++) {
  198. // 网络链接
  199. if (/^http/.test(url)) {
  200. url = url.replace(/^(https*):\/\/([\S]+?)\//, function() {
  201. var domain = "";
  202. for (var c of arguments[2]) {
  203. if (/[a-zA-Z]/.test(c))
  204. domain += (Math.random() >= 0.5 ? c.toUpperCase() : c);
  205. else domain += c;
  206. }
  207. return (arguments[1] + '://' + domain + '/');
  208. })
  209. }
  210. // base64
  211. else if (/base64/.test(url)) {
  212. url = url.replace(/^data:(image\/\S+);base64,/, function() {
  213. var head = "";
  214. for (var c of arguments[1]) {
  215. if (/[a-zA-Z]/.test(c))
  216. head += (Math.random() >= 0.5 ? c.toUpperCase() : c);
  217. else head += c;
  218. }
  219. return ('data:' + head + ';base64,');
  220. })
  221. } else break;
  222. }
  223. // #endif
  224. element.current = this.imgList.length;
  225. this.imgList.push(url);
  226. } else
  227. attrs.ignore = "true";
  228. if (this._domain && attrs.src[0] == '/') {
  229. if (attrs.src[1] == '/') attrs.src = this._protocol + ":" + attrs.src;
  230. else attrs.src = this._domain + attrs.src;
  231. }
  232. break;
  233. case 'font':
  234. name = 'span';
  235. if (attrs.color) {
  236. attrs.style += (';color:' + attrs.color);
  237. delete attrs.color;
  238. }
  239. if (attrs.face) {
  240. attrs.style += (";font-family:" + attrs.face);
  241. delete attrs.face;
  242. }
  243. if (attrs.size) {
  244. var size = parseInt(attrs.size);
  245. if (size < 1) size = 1;
  246. else if (size > 7) size = 7;
  247. let map = [10, 13, 16, 18, 24, 32, 48];
  248. attrs.style += (";font-size:" + map[size - 1] + "px");
  249. delete attrs.size;
  250. }
  251. break;
  252. case 'a':
  253. case 'ad':
  254. this._bubbling();
  255. break;
  256. case 'video':
  257. case 'audio':
  258. attrs.loop = attrs.hasOwnProperty('loop');
  259. attrs.controls = attrs.hasOwnProperty('controls');
  260. attrs.autoplay = attrs.hasOwnProperty('autoplay');
  261. if (name == 'video') {
  262. attrs.muted = attrs.hasOwnProperty('muted');
  263. if (attrs.width) {
  264. attrs.style = 'width:' + parseFloat(attrs.width) + 'px;' + attrs.style;
  265. delete attrs.width;
  266. }
  267. if (attrs.height) {
  268. attrs.style = 'height:' + parseFloat(attrs.height) + 'px;' + attrs.style;
  269. delete attrs.height;
  270. }
  271. }
  272. attrs.id = randomId() + (name == 'video' ? ++this._videoNum : '');
  273. attrs.source = [];
  274. if (attrs.src) attrs.source.push(attrs.src);
  275. if (!attrs.controls && !attrs.autoplay)
  276. console.warn('存在没有controls属性的' + name + '标签,可能导致无法播放', attrs);
  277. this._bubbling();
  278. break;
  279. case 'source':
  280. let parent = this._tagStack[this._tagStack.length - 1];
  281. if (parent && (parent.name == 'video' || parent.name == 'audio')) {
  282. parent.attrs.source.push(attrs.src);
  283. if (!parent.attrs.src) parent.attrs.src = attrs.src;
  284. }
  285. this._tagStack.push(element);
  286. return;
  287. }
  288. if(this._useAnchor && attrs.id) this._bubbling();
  289. attrs.style = matched + attrs.style;
  290. if (blockTag[name]) name = 'div';
  291. else if (!trustTag.hasOwnProperty(name)) name = 'span';
  292. element.name = name;
  293. element.attrs = attrs;
  294. this._addDomElement(element);
  295. this._tagStack.push(element);
  296. };
  297. DomHandler.prototype.ontext = function(data) {
  298. if (!this._whiteSpace) {
  299. if (!/\S/.test(data))
  300. return;
  301. data = data.replace(/\s+/g, " ");
  302. }
  303. // #ifndef MP-WEIXIN || MP-QQ || APP-PLUS || H5
  304. let entities = {
  305. lt: "<",
  306. gt: ">",
  307. nbsp: "\u00A0",
  308. ensp: "\u2002",
  309. emsp: "\u2003",
  310. amp: "&",
  311. apos: "'",
  312. quot: '"',
  313. ndash: "–",
  314. mdash: "—",
  315. middot: "·",
  316. lsquo: "‘",
  317. rsquo: "’",
  318. ldquo: "“",
  319. rdquo: "”",
  320. bull: "•",
  321. hellip: "…",
  322. permil: "‰",
  323. copy: "©",
  324. reg: "®",
  325. trade: "™",
  326. times: "×",
  327. divide: "÷",
  328. cent: "¢",
  329. pound: "£",
  330. yen: "¥",
  331. euro: "€",
  332. sect: "§"
  333. };
  334. data = data.replace(/&(\S{1,8}?);/g, function() {
  335. if (entities[arguments[1]]) return entities[arguments[1]];
  336. return "";
  337. });
  338. // #endif
  339. let element = {
  340. type: 'text'
  341. };
  342. // #ifdef MP-WEIXIN || MP-QQ || H5 || APP-PLUS
  343. data = data.replace(/&nbsp;/g, '&#xA0;'); // 解决连续&nbsp;失效问题
  344. if (/&#*((?!sp|lt|gt).){2,8};/.test(data)) element.decode = true;
  345. // #endif
  346. if (emoji) data = emoji.parseEmoji(data);
  347. element.text = data;
  348. this._addDomElement(element);
  349. };
  350. DomHandler.prototype.onclosetag = function(name) {
  351. let element = this._tagStack.pop();
  352. let parent = this._tagStack.length ? this._tagStack[this._tagStack.length - 1].children : this.nodes;
  353. if (ignoreTag[name]) {
  354. if (name == 'title') {
  355. try {
  356. this.title = element.children[0].text;
  357. } catch (e) {}
  358. }
  359. parent.pop();
  360. }
  361. // 设置表格的边框
  362. if (name == 'table') {
  363. if (element.attrs.border)
  364. element.attrs.style += (";border:" + element.attrs.border + "px solid gray;");
  365. if (element.attrs.hasOwnProperty("cellspacing"))
  366. element.attrs.style += (";border-spacing:" + element.attrs.cellspacing + "px");
  367. function setBorder(node) {
  368. if (node.type == 'text') return;
  369. if (node.name == 'th' || node.name == 'td') {
  370. if (element.attrs.border)
  371. node.attrs.style += ";border:" + element.attrs.border + "px solid gray;";
  372. if (element.attrs.hasOwnProperty("cellpadding"))
  373. node.attrs.style += ";padding:" + element.attrs.cellpadding + "px";
  374. }
  375. for (var child of node.children)
  376. setBorder(child);
  377. }
  378. if (element.attrs.border || element.attrs.hasOwnProperty("cellpadding")) {
  379. for (var child of element.children)
  380. setBorder(child);
  381. }
  382. }
  383. // 合并一些不必要的层,减小节点深度
  384. if (element.children.length == 1 && element.name == 'div') {
  385. let child = element.children[0];
  386. if (child.name == 'div' && !(/padding/.test(element.attrs.style)) && !(/margin/.test(element.attrs.style) &&
  387. /margin/.test(child.attrs.style)) && !(/display/.test(element.attrs.style)) && !(/display/.test(child.attrs.style)) &&
  388. !(element.attrs.id && child.attrs.id) && !(element.attrs.class && child.attrs.class)) {
  389. if (/padding/.test(child.attrs.style))
  390. child.attrs.style = ";box-sizing:border-box;" + child.attrs.style;
  391. child.attrs.style = element.attrs.style + ";" + child.attrs.style;
  392. child.attrs.id = (child.attrs.id || "") + (element.attrs.id || "");
  393. child.attrs.class = (child.attrs.class || "") + (element.attrs.class || "");
  394. parent[parent.indexOf(element)] = child;
  395. }
  396. }
  397. if (element.pre) {
  398. this._whiteSpace = false;
  399. for (var ele of this._tagStack)
  400. if (ele.pre)
  401. this._whiteSpace = true;
  402. delete element.pre;
  403. }
  404. // 多层样式处理
  405. if (this._CssHandler.pop)
  406. this._CssHandler.pop(element);
  407. };
  408. module.exports = DomHandler;