worksheet-reader.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. const {EventEmitter} = require('events');
  2. const parseSax = require('../../utils/parse-sax');
  3. const _ = require('../../utils/under-dash');
  4. const utils = require('../../utils/utils');
  5. const colCache = require('../../utils/col-cache');
  6. const Dimensions = require('../../doc/range');
  7. const Row = require('../../doc/row');
  8. const Column = require('../../doc/column');
  9. class WorksheetReader extends EventEmitter {
  10. constructor({workbook, id, iterator, options}) {
  11. super();
  12. this.workbook = workbook;
  13. this.id = id;
  14. this.iterator = iterator;
  15. this.options = options || {};
  16. // and a name
  17. this.name = `Sheet${this.id}`;
  18. // column definitions
  19. this._columns = null;
  20. this._keys = {};
  21. // keep a record of dimensions
  22. this._dimensions = new Dimensions();
  23. }
  24. // destroy - not a valid operation for a streaming writer
  25. // even though some streamers might be able to, it's a bad idea.
  26. destroy() {
  27. throw new Error('Invalid Operation: destroy');
  28. }
  29. // return the current dimensions of the writer
  30. get dimensions() {
  31. return this._dimensions;
  32. }
  33. // =========================================================================
  34. // Columns
  35. // get the current columns array.
  36. get columns() {
  37. return this._columns;
  38. }
  39. // get a single column by col number. If it doesn't exist, it and any gaps before it
  40. // are created.
  41. getColumn(c) {
  42. if (typeof c === 'string') {
  43. // if it matches a key'd column, return that
  44. const col = this._keys[c];
  45. if (col) {
  46. return col;
  47. }
  48. // otherise, assume letter
  49. c = colCache.l2n(c);
  50. }
  51. if (!this._columns) {
  52. this._columns = [];
  53. }
  54. if (c > this._columns.length) {
  55. let n = this._columns.length + 1;
  56. while (n <= c) {
  57. this._columns.push(new Column(this, n++));
  58. }
  59. }
  60. return this._columns[c - 1];
  61. }
  62. getColumnKey(key) {
  63. return this._keys[key];
  64. }
  65. setColumnKey(key, value) {
  66. this._keys[key] = value;
  67. }
  68. deleteColumnKey(key) {
  69. delete this._keys[key];
  70. }
  71. eachColumnKey(f) {
  72. _.each(this._keys, f);
  73. }
  74. async read() {
  75. try {
  76. for await (const events of this.parse()) {
  77. for (const {eventType, value} of events) {
  78. this.emit(eventType, value);
  79. }
  80. }
  81. this.emit('finished');
  82. } catch (error) {
  83. this.emit('error', error);
  84. }
  85. }
  86. async *[Symbol.asyncIterator]() {
  87. for await (const events of this.parse()) {
  88. for (const {eventType, value} of events) {
  89. if (eventType === 'row') {
  90. yield value;
  91. }
  92. }
  93. }
  94. }
  95. async *parse() {
  96. const {iterator, options} = this;
  97. let emitSheet = false;
  98. let emitHyperlinks = false;
  99. let hyperlinks = null;
  100. switch (options.worksheets) {
  101. case 'emit':
  102. emitSheet = true;
  103. break;
  104. case 'prep':
  105. break;
  106. default:
  107. break;
  108. }
  109. switch (options.hyperlinks) {
  110. case 'emit':
  111. emitHyperlinks = true;
  112. break;
  113. case 'cache':
  114. this.hyperlinks = hyperlinks = {};
  115. break;
  116. default:
  117. break;
  118. }
  119. if (!emitSheet && !emitHyperlinks && !hyperlinks) {
  120. return;
  121. }
  122. // references
  123. const {sharedStrings, styles, properties} = this.workbook;
  124. // xml position
  125. let inCols = false;
  126. let inRows = false;
  127. let inHyperlinks = false;
  128. // parse state
  129. let cols = null;
  130. let row = null;
  131. let c = null;
  132. let current = null;
  133. for await (const events of parseSax(iterator)) {
  134. const worksheetEvents = [];
  135. for (const {eventType, value} of events) {
  136. if (eventType === 'opentag') {
  137. const node = value;
  138. if (emitSheet) {
  139. switch (node.name) {
  140. case 'cols':
  141. inCols = true;
  142. cols = [];
  143. break;
  144. case 'sheetData':
  145. inRows = true;
  146. break;
  147. case 'col':
  148. if (inCols) {
  149. cols.push({
  150. min: parseInt(node.attributes.min, 10),
  151. max: parseInt(node.attributes.max, 10),
  152. width: parseFloat(node.attributes.width),
  153. styleId: parseInt(node.attributes.style || '0', 10),
  154. });
  155. }
  156. break;
  157. case 'row':
  158. if (inRows) {
  159. const r = parseInt(node.attributes.r, 10);
  160. row = new Row(this, r);
  161. if (node.attributes.ht) {
  162. row.height = parseFloat(node.attributes.ht);
  163. }
  164. if (node.attributes.s) {
  165. const styleId = parseInt(node.attributes.s, 10);
  166. const style = styles.getStyleModel(styleId);
  167. if (style) {
  168. row.style = style;
  169. }
  170. }
  171. }
  172. break;
  173. case 'c':
  174. if (row) {
  175. c = {
  176. ref: node.attributes.r,
  177. s: parseInt(node.attributes.s, 10),
  178. t: node.attributes.t,
  179. };
  180. }
  181. break;
  182. case 'f':
  183. if (c) {
  184. current = c.f = {text: ''};
  185. }
  186. break;
  187. case 'v':
  188. if (c) {
  189. current = c.v = {text: ''};
  190. }
  191. break;
  192. case 'mergeCell':
  193. break;
  194. default:
  195. break;
  196. }
  197. }
  198. // =================================================================
  199. //
  200. if (emitHyperlinks || hyperlinks) {
  201. switch (node.name) {
  202. case 'hyperlinks':
  203. inHyperlinks = true;
  204. break;
  205. case 'hyperlink':
  206. if (inHyperlinks) {
  207. const hyperlink = {
  208. ref: node.attributes.ref,
  209. rId: node.attributes['r:id'],
  210. };
  211. if (emitHyperlinks) {
  212. worksheetEvents.push({eventType: 'hyperlink', value: hyperlink});
  213. } else {
  214. hyperlinks[hyperlink.ref] = hyperlink;
  215. }
  216. }
  217. break;
  218. default:
  219. break;
  220. }
  221. }
  222. } else if (eventType === 'text') {
  223. // only text data is for sheet values
  224. if (emitSheet) {
  225. if (current) {
  226. current.text += value;
  227. }
  228. }
  229. } else if (eventType === 'closetag') {
  230. const node = value;
  231. if (emitSheet) {
  232. switch (node.name) {
  233. case 'cols':
  234. inCols = false;
  235. this._columns = Column.fromModel(cols);
  236. break;
  237. case 'sheetData':
  238. inRows = false;
  239. break;
  240. case 'row':
  241. this._dimensions.expandRow(row);
  242. worksheetEvents.push({eventType: 'row', value: row});
  243. row = null;
  244. break;
  245. case 'c':
  246. if (row && c) {
  247. const address = colCache.decodeAddress(c.ref);
  248. const cell = row.getCell(address.col);
  249. if (c.s) {
  250. const style = styles.getStyleModel(c.s);
  251. if (style) {
  252. cell.style = style;
  253. }
  254. }
  255. if (c.f) {
  256. const cellValue = {
  257. formula: c.f.text,
  258. };
  259. if (c.v) {
  260. if (c.t === 'str') {
  261. cellValue.result = utils.xmlDecode(c.v.text);
  262. } else {
  263. cellValue.result = parseFloat(c.v.text);
  264. }
  265. }
  266. cell.value = cellValue;
  267. } else if (c.v) {
  268. switch (c.t) {
  269. case 's': {
  270. const index = parseInt(c.v.text, 10);
  271. if (sharedStrings) {
  272. cell.value = sharedStrings[index];
  273. } else {
  274. cell.value = {
  275. sharedString: index,
  276. };
  277. }
  278. break;
  279. }
  280. case 'str':
  281. cell.value = utils.xmlDecode(c.v.text);
  282. break;
  283. case 'e':
  284. cell.value = {error: c.v.text};
  285. break;
  286. case 'b':
  287. cell.value = parseInt(c.v.text, 10) !== 0;
  288. break;
  289. default:
  290. if (utils.isDateFmt(cell.numFmt)) {
  291. cell.value = utils.excelToDate(
  292. parseFloat(c.v.text),
  293. properties.model && properties.model.date1904
  294. );
  295. } else {
  296. cell.value = parseFloat(c.v.text);
  297. }
  298. break;
  299. }
  300. }
  301. if (hyperlinks) {
  302. const hyperlink = hyperlinks[c.ref];
  303. if (hyperlink) {
  304. cell.text = cell.value;
  305. cell.value = undefined;
  306. cell.hyperlink = hyperlink;
  307. }
  308. }
  309. c = null;
  310. }
  311. break;
  312. default:
  313. break;
  314. }
  315. }
  316. if (emitHyperlinks || hyperlinks) {
  317. switch (node.name) {
  318. case 'hyperlinks':
  319. inHyperlinks = false;
  320. break;
  321. default:
  322. break;
  323. }
  324. }
  325. }
  326. }
  327. if (worksheetEvents.length > 0) {
  328. yield worksheetEvents;
  329. }
  330. }
  331. }
  332. }
  333. module.exports = WorksheetReader;