.38-浅析webpack源码之babel-loader转换js文件

程序员文章站 2022-03-28 19:46:50

经过非常非常长无聊的流程，只是将获取到的module信息做了一些缓存，然后生成了loaderContext对象。这里上个图整理一下这节的流程：这一节来看webpack是如何将babel-loader与js文件结合的，首先总览一下runLoaders函数：传入的4个参数都很直白： 1、待处理文件 ......

　　经过非常非常长无聊的流程，只是将获取到的module信息做了一些缓存，然后生成了loaderContext对象。

　　这里上个图整理一下这节的流程：

　　这一节来看webpack是如何将babel-loader与js文件结合的，首先总览一下runLoaders函数：

/*
    options => 
    {
        resource: 'd:\\workspace\\doc\\input.js',
        loaders: [ { loader: 'd:\\workspace\\node_modules\\babel-loader\\lib\\index.js' } ],
        context: loaderContext,
        readResource: fs.readFile.bind(fs)
    }
*/
exports.runLoaders = function runLoaders(options, callback) {
    // read options
    var resource = options.resource || "";
    var loaders = options.loaders || [];
    var loaderContext = options.context || {};
    var readResource = options.readResource || readFile;

    // 简单讲就是获取入口文件的绝对路径、参数、目录
    var splittedResource = resource && splitQuery(resource);
    var resourcePath = splittedResource ? splittedResource[0] : undefined;
    var resourceQuery = splittedResource ? splittedResource[1] : undefined;
    var contextDirectory = resourcePath ? dirname(resourcePath) : null;

    // execution state
    var requestCacheable = true;
    var fileDependencies = [];
    var contextDependencies = [];

    // prepare loader objects
    loaders = loaders.map(createLoaderObject);

    // 将属性都挂载到loaderContext上面
    loaderContext.context = contextDirectory;
    loaderContext.loaderIndex = 0;
    loaderContext.loaders = loaders;
    loaderContext.resourcePath = resourcePath;
    loaderContext.resourceQuery = resourceQuery;
    loaderContext.async = null;
    loaderContext.callback = null;
    loaderContext.cacheable = function cacheable(flag) {
        if (flag === false) {
            requestCacheable = false;
        }
    };
    loaderContext.dependency = loaderContext.addDependency = function addDependency(file) {
        fileDependencies.push(file);
    };
    loaderContext.addContextDependency = function addContextDependency(context) {
        contextDependencies.push(context);
    };
    loaderContext.getDependencies = function getDependencies() {
        return fileDependencies.slice();
    };
    loaderContext.getContextDependencies = function getContextDependencies() {
        return contextDependencies.slice();
    };
    loaderContext.clearDependencies = function clearDependencies() {
        fileDependencies.length = 0;
        contextDependencies.length = 0;
        requestCacheable = true;
    };
    // 定义大量的特殊属性
    Object.defineProperty(loaderContext, "resource", {
        enumerable: true,
        get: function() {
            if (loaderContext.resourcePath === undefined)
                return undefined;
            return loaderContext.resourcePath + loaderContext.resourceQuery;
        },
        set: function(value) {
            var splittedResource = value && splitQuery(value);
            loaderContext.resourcePath = splittedResource ? splittedResource[0] : undefined;
            loaderContext.resourceQuery = splittedResource ? splittedResource[1] : undefined;
        }
    });
    // ...大量Object.defineProperty

    // finish loader context
    if (Object.preventExtensions) {
        Object.preventExtensions(loaderContext);
    }

    var processOptions = {
        resourceBuffer: null,
        readResource: readResource
    };
    iteratePitchingLoaders(processOptions, loaderContext, function(err, result) {
        if (err) {
            return callback(err, {
                cacheable: requestCacheable,
                fileDependencies: fileDependencies,
                contextDependencies: contextDependencies
            });
        }
        callback(null, {
            result: result,
            resourceBuffer: processOptions.resourceBuffer,
            cacheable: requestCacheable,
            fileDependencies: fileDependencies,
            contextDependencies: contextDependencies
        });
    });
};

　　传入的4个参数都很直白：

1、待处理文件绝对路径

2、文件后缀对应的loader入口文件绝对路径

3、对应的loaderContext对象

4、fs对象

　　前面所有的事都是为了生成前3个属性，在这里整合在一起开始做转换处理。

createLoaderObject

　　这里有一个需要简单看的地方，就是对loaders数组做了一封封装：

// prepare loader objects
loaders = loaders.map(createLoaderObject);

　　简单看一下这个函数：

function createLoaderObject(loader) {
    var obj = {
        path: null,
        query: null,
        options: null,
        ident: null,
        normal: null,
        pitch: null,
        raw: null,
        data: null,
        pitchExecuted: false,
        normalExecuted: false
    };
    // 定义request属性的get/set
    Object.defineProperty(obj, "request", {
        enumerable: true,
        get: function() {
            return obj.path + obj.query;
        },
        set: function(value) {
            if (typeof value === "string") {
                var splittedRequest = splitQuery(value);
                obj.path = splittedRequest[0];
                obj.query = splittedRequest[1];
                obj.options = undefined;
                obj.ident = undefined;
            } else {
                // value => { loader: 'd:\\workspace\\node_modules\\babel-loader\\lib\\index.js' }
                if (!value.loader)
                    throw new Error("request should be a string or object with loader and object (" + JSON.stringify(value) + ")");
                // 这么多行代码其实只有第一行有用
                // 即obj.path = 'd:\\workspace\\node_modules\\babel-loader\\lib\\index.js'
                obj.path = value.loader;
                obj.options = value.options;
                obj.ident = value.ident;
                if (obj.options === null)
                    obj.query = "";
                else if (obj.options === undefined)
                    obj.query = "";
                else if (typeof obj.options === "string")
                    obj.query = "?" + obj.options;
                else if (obj.ident)
                    obj.query = "??" + obj.ident;
                else if (typeof obj.options === "object" && obj.options.ident)
                    obj.query = "??" + obj.options.ident;
                else
                    obj.query = "?" + JSON.stringify(obj.options);
            }
        }
    });
    // 这里会触发上面的set
    obj.request = loader;
    // 封装
    if (Object.preventExtensions) {
        Object.preventExtensions(obj);
    }
    return obj;
}

　　最后做封装，然后返回一个obj。

　　将属性全部挂载在loaderContext上面，最后也是调用Object.preventExtensions将属性冻结，禁止添加任何新的属性。

　　完成对象的安装后，最后调用了迭代器方法，这里看一下iteratePitchingLoaders方法内部实现：

function iteratePitchingLoaders(options, loaderContext, callback) {
    // abort after last loader
    // loaderIndex初始为0
    if (loaderContext.loaderIndex >= loaderContext.loaders.length)
        return processResource(options, loaderContext, callback);

    // 取出之前的obj
    var currentLoaderObject = loaderContext.loaders[loaderContext.loaderIndex];

    // iterate
    // 默认是false 代表当前loader未被加载过
    if (currentLoaderObject.pitchExecuted) {
        loaderContext.loaderIndex++;
        return iteratePitchingLoaders(options, loaderContext, callback);
    }

    // load loader module
    loadLoader(currentLoaderObject, function(err) {
        // ...
    });
}

　　取出来loader对象后，调用loadLoader来加载loader，看一眼：

module.exports = function loadLoader(loader, callback) {
    // 不知道这个System是什么环境下的变量
    // node环境是global
    // 浏览器环境是window
    if (typeof System === "object" && typeof System.import === "function") {
        // ...
    } else {
        try {
            // 直接尝试读取路径的文件
            var module = require(loader.path);
        } catch (e) {
            // it is possible for node to choke on a require if the FD descriptor
            // limit has been reached. give it a chance to recover.
            // 因为可能出现阻塞情况 所以这里会进行重试
            if (e instanceof Error && e.code === "EMFILE") {
                var retry = loadLoader.bind(null, loader, callback);
                if (typeof setImmediate === "function") {
                    // node >= 0.9.0
                    return setImmediate(retry);
                } else {
                    // node < 0.9.0
                    return process.nextTick(retry);
                }
            }
            return callback(e);
        }
        if (typeof loader !== "function" && typeof loader !== "object")
            throw new Error("Module '" + loader.path + "' is not a loader (export function or es6 module))");
        // babel-loader返回的module是一个function
        loader.normal = typeof module === "function" ? module : module.default;
        loader.pitch = module.pitch;
        loader.raw = module.raw;
        if (typeof loader.normal !== "function" && typeof loader.pitch !== "function")
            throw new Error("Module '" + loader.path + "' is not a loader (must have normal or pitch function)");
        callback();
    }
};

　　这里就涉及到loader的返回值，通过直接读取babel-loader的入口文件，最后返回了一个function，后面两个属性babel-loader并没有给，是undefined。

　　这里把babel-loader返回值挂载到loader上后，就调用了无参回调函数，如下：

loadLoader(currentLoaderObject, function(err) {
    if (err) return callback(err);
    // 刚才也说了这个是undefined
    var fn = currentLoaderObject.pitch;
    // 这个表明loader已经被调用了 下次再遇到就会直接跳过
    currentLoaderObject.pitchExecuted = true;
    if (!fn) return iteratePitchingLoaders(options, loaderContext, callback);

    runSyncOrAsync(
        fn,
        loaderContext, [loaderContext.remainingRequest, loaderContext.previousRequest, currentLoaderObject.data = {}],
        function(err) {
            if (err) return callback(err);
            var args = Array.prototype.slice.call(arguments, 1);
            if (args.length > 0) {
                loaderContext.loaderIndex--;
                iterateNormalLoaders(options, loaderContext, args, callback);
            } else {
                iteratePitchingLoaders(options, loaderContext, callback);
            }
        }
    );
});

　　这里把loader的一个标记置true，然后根据返回函数是否有pitch值来决定流程，很明显这里直接递归调用自身了。

　　第二次进来时，由于loader已经被加载，所以loaderIndex加1，然后再次递归。

　　第三次进来时，第一个判断中表明所有的loader都被加载完，会调用processResource方法。

processResource

　　这里的递归由于都是尾递归，所以在性能上不会有问题，直接看上面的方法：

// options => 包含fs方法的对象
// loaderContext => 包含loader路径、返回值等的对象
function processResource(options, loaderContext, callback) {
    // 从后往前调用loader
    loaderContext.loaderIndex = loaderContext.loaders.length - 1;

    // 获取入口文件路径
    var resourcePath = loaderContext.resourcePath;
    if (resourcePath) {
        /*
            loaderContext.dependency = loaderContext.addDependency = function addDependency(file) {
                fileDependencies.push(file);
            };
        */
        loaderContext.addDependency(resourcePath);
        // readResource => fs.readFile
        options.readResource(resourcePath, function(err, buffer) {
            if (err) return callback(err);
            options.resourceBuffer = buffer;
            iterateNormalLoaders(options, loaderContext, [buffer], callback);
        });
    } else {
        iterateNormalLoaders(options, loaderContext, [null], callback);
    }
}

　　这个获取入口文件路径并调用fs模块进行文件内容读取，返回文件的原始buffer后调用了iterateNormalLoaders方法。

function iterateNormalLoaders(options, loaderContext, args, callback) {
    // 当所有loader执行完后返回
    if (loaderContext.loaderIndex < 0)
        return callback(null, args);
    // 取出当前的loader
    var currentLoaderObject = loaderContext.loaders[loaderContext.loaderIndex];

    // iterate
    // 默认为false 跟另外一个标记类似 代表该loader在此方法是否被调用过
    if (currentLoaderObject.normalExecuted) {
        loaderContext.loaderIndex--;
        return iterateNormalLoaders(options, loaderContext, args, callback);
    }
    // 读取返回的module
    var fn = currentLoaderObject.normal;
    // 标记置true
    currentLoaderObject.normalExecuted = true;
    if (!fn) {
        return iterateNormalLoaders(options, loaderContext, args, callback);
    }
    /*
        function convertArgs(args, raw) {
            if (!raw && Buffer.isBuffer(args[0]))
                args[0] = utf8BufferToString(args[0]);
            else if (raw && typeof args[0] === "string")
                args[0] = new Buffer(args[0], "utf-8");
        }
        function utf8BufferToString(buf) {
            var str = buf.toString("utf-8");
            if (str.charCodeAt(0) === 0xFEFF) {
                return str.substr(1);
            } else {
                return str;
            }
        }
    */
    // 该方法将原始的buffer转换为utf-8的字符串
    convertArgs(args, currentLoaderObject.raw);

    runSyncOrAsync(fn, loaderContext, args, function(err) {
        if (err) return callback(err);

        var args = Array.prototype.slice.call(arguments, 1);
        iterateNormalLoaders(options, loaderContext, args, callback);
    });
}

　　这里的normal就是处理普通的js文件了，在读取入口文件后将其转换为utf-8的格式，然后依次获取loader，调用runSyncOrAsync。

　　源码如下：

/*
    fn => 读取babel-loader返回的函数
    context => loader的辅助对象
    args => 读取入口文件返回的字符串
*/
function runSyncOrAsync(fn, context, args, callback) {
    var isSync = true;
    var isDone = false;
    var isError = false; // internal error
    var reportedError = false;
    context.async = function async() {
        if (isDone) {
            if (reportedError) return; // ignore
            throw new Error("async(): The callback was already called.");
        }
        isSync = false;
        return innerCallback;
    };
    // 封装成执行一次的回调函数
    var innerCallback = context.callback = function() {
        if (isDone) {
            if (reportedError) return; // ignore
            throw new Error("callback(): The callback was already called.");
        }
        isDone = true;
        isSync = false;
        try {
            callback.apply(null, arguments);
        } catch (e) {
            isError = true;
            throw e;
        }
    };
    try {
        // 可以可以
        // 老子看了这么久源码就是等这个方法
        // 还装模作样的弄个IIFE
        var result = (function LOADER_EXECUTION() {
            return fn.apply(context, args);
        }());
        if (isSync) {
            isDone = true;
            if (result === undefined)
                return callback();
            // 根据转换后的类型二次处理
            if (result && typeof result === "object" && typeof result.then === "function") {
                return result.catch(callback).then(function(r) {
                    callback(null, r);
                });
            }
            return callback(null, result);
        }
    } catch (e) {
        if (isError) throw e;
        if (isDone) {
            // loader is already "done", so we cannot use the callback function
            // for better debugging we print the error on the console
            if (typeof e === "object" && e.stack) console.error(e.stack);
            else console.error(e);
            return;
        }
        isDone = true;
        reportedError = true;
        callback(e);
    }
}

　　看了那么多的垃圾代码，终于来到了最关键的方法，可以看出，本质上loader就是将读取到的字符串传入，然后返回对应的字符串或者一个Promise。

　　这里一路将结果一路返回到了最初的runLoaders方法中：

iteratePitchingLoaders(processOptions, loaderContext, function(err, result) {
    if (err) {
        return callback(err, {
            cacheable: requestCacheable,
            fileDependencies: fileDependencies,
            contextDependencies: contextDependencies
        });
    }
    /*
        result => babel-loader转换后的字符串
        resourceBuffer => JS文件的原始buffer
        cacheable => [Function]
        fileDependencies => ['d:\\workspace\\doc\\input.js']
        contextDependencies => []
    */
    callback(null, {
        result: result,
        resourceBuffer: processOptions.resourceBuffer,
        cacheable: requestCacheable,
        fileDependencies: fileDependencies,
        contextDependencies: contextDependencies
    });
});

　　因为案例比较简单，所以返回的东西也比较少，这里继续callback，返回到doBuild：

doBuild(options, compilation, resolver, fs, callback) {
    this.cacheable = false;
    const loaderContext = this.createLoaderContext(resolver, options, compilation, fs);
    runLoaders({
        resource: this.resource,
        loaders: this.loaders,
        context: loaderContext,
        readResource: fs.readFile.bind(fs)
    }, (err, result) => {
        // result => 上面的对象
        if (result) {
            this.cacheable = result.cacheable;
            this.fileDependencies = result.fileDependencies;
            this.contextDependencies = result.contextDependencies;
        }

        if (err) {
            const error = new ModuleBuildError(this, err);
            return callback(error);
        }
        // 获取对应的原始buffer、转换后的字符串、sourceMap
        const resourceBuffer = result.resourceBuffer;
        const source = result.result[0];
        // null
        const sourceMap = result.result[1];

        if (!Buffer.isBuffer(source) && typeof source !== "string") {
            const error = new ModuleBuildError(this, new Error("Final loader didn't return a Buffer or String"));
            return callback(error);
        }
        /*
            function asString(buf) {
                if (Buffer.isBuffer(buf)) {
                    return buf.toString("utf-8");
                }
                return buf;
            }
        */
        this._source = this.createSource(asString(source), resourceBuffer, sourceMap);
        return callback();
    });
}

　　这次获取处理完的对象属性，然后调用另外一个createSource方法：

createSource(source, resourceBuffer, sourceMap) {
    // if there is no identifier return raw source
    if (!this.identifier) {
        return new RawSource(source);
    }

    // from here on we assume we have an identifier
    // 返回下面这个东西 很久之前拼接的
    // d:\workspace\node_modules\babel-loader\lib\index.js!d:\workspace\doc\input.js
    const identifier = this.identifier();
    // 下面两个属性根本没出现过
    if (this.lineToLine && resourceBuffer) {
        return new LineToLineMappedSource(
            source, identifier, asString(resourceBuffer));
    }

    if (this.useSourceMap && sourceMap) {
        return new SourceMapSource(source, identifier, sourceMap);
    }
    // 直接进这里
    /*
        class OriginalSource extends Source {
            constructor(value, name) {
                super();
                this._value = value;
                this._name = name;
            }

            //...原型方法
        }
    */
    return new OriginalSource(source, identifier);
}

　　因为都比较简单，所以直接看注释就好了，没啥好解释的。

　　所有的new都只看看构造函数，方法那么多，又不是全用。

　　返回的对象赋值给了NormalModule对象的_source属性，然后又是callback，这次回到了build那里：

build(options, compilation, resolver, fs, callback) {
    this.buildTimestamp = Date.now();
    this.built = true;
    this._source = null;
    this.error = null;
    this.errors.length = 0;
    this.warnings.length = 0;
    this.meta = {};

    return this.doBuild(options, compilation, resolver, fs, (err) => {
        this.dependencies.length = 0;
        this.variables.length = 0;
        this.blocks.length = 0;
        this._cachedSource = null;

        // if we have an error mark module as failed and exit
        if (err) {
            this.markModuleAsErrored(err);
            return callback();
        }

        // check if this module should !not! be parsed.
        // if so, exit here;
        // undefined跳过
        const noParseRule = options.module && options.module.noParse;
        if (this.shouldPreventParsing(noParseRule, this.request)) {
            return callback();
        }

        try {
            this.parser.parse(this._source.source(), {
                current: this,
                module: this,
                compilation: compilation,
                options: options
            });
        } catch (e) {
            const source = this._source.source();
            const error = new ModuleParseError(this, source, e);
            this.markModuleAsErrored(error);
            return callback();
        }
        return callback();
    });
}

　　基本上不知道module.noParser选项哪个人会用，所以这里一般都是直接跳过然后调用那个可怕对象parser对象的parse方法，开始进行解析。

　　这节的内容就这样吧，总算是把loader跑完了，这个系列的目的也就差不多了。

　　其实总体来说过程就几步，但是代码的复杂程度真的是不想说了……

上一篇： MySQL分布式数据库架构：分库、分表、排序、分页、分组、实现教程

下一篇： C# Select

.38-浅析webpack源码之babel-loader转换js文件

.35-浅析webpack源码之babel-loader入口文件路径读取