356 lines
10 KiB
JavaScript
356 lines
10 KiB
JavaScript
// @ts-check
|
|
|
|
/* eslint no-use-before-define:0 */
|
|
'use strict'; // Import
|
|
|
|
var pathUtil = require('path');
|
|
|
|
var textExtensions = require('textextensions');
|
|
|
|
var binaryExtensions = require('binaryextensions');
|
|
/**
|
|
* WIll be `null` if `buffer` was not provided. Otherwise will be either `'utf8'` or `'binary'`.
|
|
* @typedef {'utf8'|'binary'|null} EncodingResult
|
|
*/
|
|
|
|
/**
|
|
* WIll be `null` if neither `filename` nor `buffer` were provided. Otherwise will be a boolean value with the detection result.
|
|
* @typedef {boolean|null} TextOrBinaryResult
|
|
*/
|
|
|
|
/**
|
|
* @typedef {Object} EncodingOpts
|
|
* @property {number} [chunkLength = 24]
|
|
* @property {number} [chunkBegin = 0]
|
|
*/
|
|
|
|
/**
|
|
* @callback IsTextCallback
|
|
* @param {Error?} error
|
|
* @param {TextOrBinaryResult} [isTextResult]
|
|
*/
|
|
|
|
/**
|
|
* @callback IsBinaryCallback
|
|
* @param {Error?} error
|
|
* @param {TextOrBinaryResult} [isBinaryResult]
|
|
*/
|
|
|
|
/**
|
|
* @callback GetEncodingCallback
|
|
* @param {Error?} error
|
|
* @param {EncodingResult} [encoding]
|
|
*/
|
|
|
|
/**
|
|
* Determine if the filename and/or buffer is text.
|
|
* Determined by extension checks first (if filename is available), otherwise if unknown extension or no filename, will perform a slower buffer encoding detection.
|
|
* This order is done, as extension checks are quicker, and also because encoding checks cannot guarantee accuracy for chars between utf8 and utf16.
|
|
* The extension checks are performed using the resources https://github.com/bevry/textextensions and https://github.com/bevry/binaryextensions
|
|
* In a later major release, this function will become {@link isText} so you should use that instead.
|
|
* @param {string} [filename] The filename for the file/buffer if available
|
|
* @param {Buffer} [buffer] The buffer for the file if available
|
|
* @returns {TextOrBinaryResult}
|
|
*/
|
|
|
|
|
|
function isTextSync(filename, buffer) {
|
|
// Test extensions
|
|
if (filename) {
|
|
// Extract filename
|
|
var parts = pathUtil.basename(filename).split('.').reverse(); // Cycle extensions
|
|
|
|
var _iteratorNormalCompletion = true;
|
|
var _didIteratorError = false;
|
|
var _iteratorError = undefined;
|
|
|
|
try {
|
|
for (var _iterator = parts[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
|
|
var extension = _step.value;
|
|
|
|
if (textExtensions.indexOf(extension) !== -1) {
|
|
return true;
|
|
}
|
|
|
|
if (binaryExtensions.indexOf(extension) !== -1) {
|
|
return false;
|
|
}
|
|
}
|
|
} catch (err) {
|
|
_didIteratorError = true;
|
|
_iteratorError = err;
|
|
} finally {
|
|
try {
|
|
if (!_iteratorNormalCompletion && _iterator.return != null) {
|
|
_iterator.return();
|
|
}
|
|
} finally {
|
|
if (_didIteratorError) {
|
|
throw _iteratorError;
|
|
}
|
|
}
|
|
}
|
|
} // Fallback to encoding if extension check was not enough
|
|
|
|
|
|
if (buffer) {
|
|
return getEncodingSync(buffer) === 'utf8';
|
|
} // No buffer was provided
|
|
|
|
|
|
return null;
|
|
}
|
|
/**
|
|
* Callback wrapper for {@link isTextSync}.
|
|
* @param {string?} filename
|
|
* @param {Buffer?} buffer
|
|
* @param {IsTextCallback} callback
|
|
* @returns {void}
|
|
*/
|
|
|
|
|
|
function isTextCallback(filename, buffer, callback) {
|
|
var result;
|
|
|
|
try {
|
|
result = isTextSync(filename, buffer);
|
|
} catch (err) {
|
|
callback(err);
|
|
}
|
|
|
|
callback(null, result);
|
|
}
|
|
/**
|
|
* Promise wrapper for {@link isTextSync}.
|
|
* @param {string?} filename
|
|
* @param {Buffer?} buffer
|
|
* @returns {Promise<TextOrBinaryResult>}
|
|
*/
|
|
|
|
|
|
function isTextPromise(filename, buffer) {
|
|
try {
|
|
return Promise.resolve(isTextSync(filename, buffer));
|
|
} catch (err) {
|
|
return Promise.reject(err);
|
|
}
|
|
}
|
|
/**
|
|
* Wrapper around {@link isTextSync} for sync signature and {@link isTextCallback} async signature.
|
|
* In a later major release, {@link isTextSync}.will become this function, so if you prefer the callback interface you should use {@link isTextCallback}.
|
|
* @param {string?} filename
|
|
* @param {Buffer?} buffer
|
|
* @param {IsTextCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
|
|
* @returns {TextOrBinaryResult|void} If no callback was provided, then the result is returned.
|
|
*/
|
|
|
|
|
|
function isText(filename, buffer, callback) {
|
|
if (callback) {
|
|
return isTextCallback(filename, buffer, callback);
|
|
} else return isTextSync(filename, buffer);
|
|
}
|
|
/**
|
|
* Inverse wrapper for {@link isTextSync}.
|
|
* In a later major release, this function will become {@link isBinary} so you should use that instead.
|
|
* @param {string} [filename]
|
|
* @param {Buffer} [buffer]
|
|
* @returns {TextOrBinaryResult}
|
|
*/
|
|
|
|
|
|
function isBinarySync(filename, buffer) {
|
|
var text = isTextSync(filename, buffer);
|
|
if (text == null) return null;
|
|
return !text;
|
|
}
|
|
/**
|
|
* Callback wrapper for {@link isBinarySync}.
|
|
* @param {string?} filename
|
|
* @param {Buffer?} buffer
|
|
* @param {IsTextCallback} callback
|
|
* @returns {void}
|
|
*/
|
|
|
|
|
|
function isBinaryCallback(filename, buffer, callback) {
|
|
var result;
|
|
|
|
try {
|
|
result = isBinarySync(filename, buffer);
|
|
} catch (err) {
|
|
callback(err);
|
|
}
|
|
|
|
callback(null, result);
|
|
}
|
|
/**
|
|
* Promise wrapper for {@link isBinarySync}.
|
|
* @param {string?} filename
|
|
* @param {Buffer?} buffer
|
|
* @returns {Promise<TextOrBinaryResult>}
|
|
*/
|
|
|
|
|
|
function isBinaryPromise(filename, buffer) {
|
|
try {
|
|
return Promise.resolve(isBinarySync(filename, buffer));
|
|
} catch (err) {
|
|
return Promise.reject(err);
|
|
}
|
|
}
|
|
/**
|
|
* Wrapper around {@link isBinarySync} for sync signature and {@link isBinaryCallback} async signature.
|
|
* In a later major release, {@link isBinarySync}.will become this function, so if you prefer the callback interface you should use {@link isBinaryCallback}.
|
|
* @param {string?} filename
|
|
* @param {Buffer?} buffer
|
|
* @param {IsTextCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
|
|
* @returns {TextOrBinaryResult|void} If no callback was provided, then the result is returned.
|
|
*/
|
|
|
|
|
|
function isBinary(filename, buffer, callback) {
|
|
if (callback) {
|
|
return isBinaryCallback(filename, buffer, callback);
|
|
} else return isBinarySync(filename, buffer);
|
|
}
|
|
/**
|
|
* Get the encoding of a buffer.
|
|
* Checks the start, middle, and end of the buffer for characters that are unrecognized within UTF8 encoding.
|
|
* History has shown that inspection at all three locations is necessary.
|
|
* In a later major release, this function will become {@link getEncoding} so you should use that instead.
|
|
* @param {Buffer} buffer
|
|
* @param {EncodingOpts} [opts]
|
|
* @returns {EncodingResult}
|
|
*/
|
|
|
|
|
|
function getEncodingSync(buffer, opts) {
|
|
// Check
|
|
if (!buffer) return null; // Prepare
|
|
|
|
var textEncoding = 'utf8';
|
|
var binaryEncoding = 'binary'; // Discover
|
|
|
|
if (opts == null) {
|
|
// Start
|
|
var chunkLength = 24;
|
|
var encoding = getEncodingSync(buffer, {
|
|
chunkLength: chunkLength
|
|
});
|
|
|
|
if (encoding === textEncoding) {
|
|
// Middle
|
|
var chunkBegin = Math.max(0, Math.floor(buffer.length / 2) - chunkLength);
|
|
encoding = getEncodingSync(buffer, {
|
|
chunkLength: chunkLength,
|
|
chunkBegin: chunkBegin
|
|
});
|
|
|
|
if (encoding === textEncoding) {
|
|
// End
|
|
chunkBegin = Math.max(0, buffer.length - chunkLength);
|
|
encoding = getEncodingSync(buffer, {
|
|
chunkLength: chunkLength,
|
|
chunkBegin: chunkBegin
|
|
});
|
|
}
|
|
} // Return
|
|
|
|
|
|
return encoding;
|
|
} else {
|
|
// Extract
|
|
var _opts$chunkLength = opts.chunkLength,
|
|
_chunkLength = _opts$chunkLength === void 0 ? 24 : _opts$chunkLength,
|
|
_opts$chunkBegin = opts.chunkBegin,
|
|
_chunkBegin = _opts$chunkBegin === void 0 ? 0 : _opts$chunkBegin;
|
|
|
|
var chunkEnd = Math.min(buffer.length, _chunkBegin + _chunkLength);
|
|
var contentChunkUTF8 = buffer.toString(textEncoding, _chunkBegin, chunkEnd); // Detect encoding
|
|
|
|
for (var i = 0; i < contentChunkUTF8.length; ++i) {
|
|
var charCode = contentChunkUTF8.charCodeAt(i);
|
|
|
|
if (charCode === 65533 || charCode <= 8) {
|
|
// 8 and below are control characters (e.g. backspace, null, eof, etc.)
|
|
// 65533 is the unknown character
|
|
// console.log(charCode, contentChunkUTF8[i])
|
|
return binaryEncoding;
|
|
}
|
|
} // Return
|
|
|
|
|
|
return textEncoding;
|
|
}
|
|
}
|
|
/**
|
|
* Get the encoding of a buffer.
|
|
* Uses {@link getEncodingSync} behind the scenes.
|
|
* @param {Buffer} buffer
|
|
* @param {EncodingOpts} [opts]
|
|
* @param {GetEncodingCallback} callback
|
|
* @returns {void}
|
|
*/
|
|
|
|
|
|
function getEncodingCallback(buffer, opts, callback) {
|
|
if (typeof opts === 'function' && callback == null) return getEncodingCallback(buffer, null, opts);
|
|
/** @type {EncodingResult?} */
|
|
|
|
var result;
|
|
|
|
try {
|
|
result = getEncodingSync(buffer, opts);
|
|
} catch (err) {
|
|
callback(err);
|
|
}
|
|
|
|
callback(null, result);
|
|
}
|
|
/**
|
|
* Promise wrapper for {@link getEncodingSync}.
|
|
* @param {Buffer} buffer
|
|
* @param {EncodingOpts} [opts]
|
|
* @returns {Promise<EncodingResult>}
|
|
*/
|
|
|
|
|
|
function getEncodingPromise(buffer, opts) {
|
|
try {
|
|
return Promise.resolve(getEncodingSync(buffer, opts));
|
|
} catch (err) {
|
|
return Promise.reject(err);
|
|
}
|
|
}
|
|
/**
|
|
* Wrapper around {@link getEncodingSync} for sync signature and {@link getEncodingCallback} async signature.
|
|
* In a later major release, {@link getEncodingSync}.will become this function, so if you prefer the callback interface you should use {@link getEncodingCallback}.
|
|
* @param {Buffer} buffer
|
|
* @param {EncodingOpts} [opts]
|
|
* @param {GetEncodingCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
|
|
* @returns {EncodingResult|void} If no callback was provided, then the result is returned.
|
|
*/
|
|
|
|
|
|
function getEncoding(buffer, opts, callback) {
|
|
if (callback || typeof opts === 'function') {
|
|
return getEncodingCallback(buffer, opts, callback);
|
|
} else return getEncodingSync(buffer, opts);
|
|
} // Export
|
|
|
|
|
|
module.exports = {
|
|
isTextSync: isTextSync,
|
|
isTextCallback: isTextCallback,
|
|
isTextPromise: isTextPromise,
|
|
isText: isText,
|
|
isBinarySync: isBinarySync,
|
|
isBinaryCallback: isBinaryCallback,
|
|
isBinaryPromise: isBinaryPromise,
|
|
isBinary: isBinary,
|
|
getEncoding: getEncoding,
|
|
getEncodingSync: getEncodingSync,
|
|
getEncodingPromise: getEncodingPromise,
|
|
getEncodingCallback: getEncodingCallback
|
|
}; |