DataDome尝试AST解混淆

提示：仅供学习交流，不得用做商业交易，如有侵权请及时联系作者删除

本文只对其混淆进行分析，不实现具体加密算法。有需要可以看公众号 => frida and so文章；本文也是基于大佬文章写的

整体代码初步分析

可以看到整体的代码结构为webpack结构，并且其中存在了大量的混淆，本文的目标既是尽可能地降低分析加密算法时的难度

第一步：字符串解混淆

可以看到在源码中存在大量的十六进制字符串，我们第一个可以对其进行还原

思路：匹配所有的StringLiteral节点，并判断是否满足我们的判断依据，然后进行替换

实现代码如下：

/**
 * 解码转义字符串
 * @param {String} str 
 * @returns 
 */
function decodeEscapeString(str) {
    return str
        .replace(/\\x([0-9a-fA-F]{2})/g, (_, hex) =>
            String.fromCharCode(parseInt(hex, 16))
        )
        .replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
            String.fromCharCode(parseInt(hex, 16))
        );
}

traverse(ast, {
    StringLiteral(path) {
        try {
            if (path.node.extra && path.node.extra.raw && /\\x|\\u/.test(path.node.extra.raw)) {
                const decoded = decodeEscapeString(path.node.value);
                path.replaceWith(t.stringLiteral(decoded));
            }
        } catch (err) {
        }
    }
});

第二步：第一层数值计算

紧接着可以看到代码中存在了大量的数值计算的封装函数调用，基本特征为：传入两个数值，在函数内部对数值进行运算，然后进行返回

思路：匹配所有满足条件的封装函数，然后在调用该函数的地方进行运算，取值，替换节点操作
函数特征：要么是第一句是return语句，要么是第一句是var定义语句，并且在return语句中的表达式全部为数值上的计算

实现代码如下：

/**
 * 检查函数是否符合混淆函数结构
 * 支持：单 return 或 var+return
 */
function extractObfuscatedFunction(statements) {
    const decls = [];
    let returnExpr = null;

    for (const stmt of statements) {
        if (t.isVariableDeclaration(stmt) && stmt.kind === "var") {
            for (const d of stmt.declarations) {
                if (t.isIdentifier(d.id) && isPureArithmeticExpression(d.init)) {
                    decls.push({ name: d.id.name, init: d.init });
                } else {
                    return null;
                }
            }
        } else if (t.isReturnStatement(stmt)) {
            if (returnExpr) return null; // 多个 return
            returnExpr = stmt.argument;
        } else {
            return null; // 有其他语句
        }
    }

    if (!returnExpr || !isPureArithmeticExpression(returnExpr)) return null;

    return { vars: decls, expr: returnExpr };
}

var obfuscatedFuncs = {};

traverse(ast, {
    FunctionDeclaration(path) {
        const { id, body, params } = path.node;
        if (!id || !id.name) return;
        const check = extractObfuscatedFunction(body.body);
        if (check) {
            const allCode =
                check.vars.map(v => `var ${v.name} = ${generate(v.init).code};`).join("\n") +
                `\n(${generate(check.expr).code})`;
            obfuscatedFuncs[id.name] = {
                params: params.map(p => p.name),
                exprCode: allCode,
                path: path
            };
        }
    },
});

traverse(ast, {
    CallExpression(path) {
        const callee = path.node.callee;
        if (!t.isIdentifier(callee)) return;
        const funcName = callee.name;
        const obf = obfuscatedFuncs[funcName];
        if (!obf) return;
        // 判断传入的参数是否为常量
        if (path.node.arguments.every(arg => t.isNumericLiteral(arg) || t.isArrayExpression(arg) || t.isUnaryExpression(arg))) {
            const args = {};
            obf.params.forEach((p, idx) => {
                args[p] = path.node.arguments[idx]
                    ? eval(generate(path.node.arguments[idx]).code)
                    : undefined;
            });
            const result = evaluateExpression(obf.exprCode, args);
            path.replaceWith(t.valueToNode(result));
        }
    }
})

第三步：属性调用修改

在经过上面几步的处理后，会发现源码中会出现一些奇怪的属性调用方式，Object[["prop"]]，我们需要修改为：Object.prop

思路：匹配 MemberExpression节点，如果发现该节点的property 是数组类型的话，就对其进行替换为正常节点

实现代码如下：

traverse(ast, {
    MemberExpression(path) {
        const property = path.node.property;
        if (t.isArrayExpression(property) && property.elements.length == 1) {
            var temp = property.elements[0];
            if (t.isStringLiteral(temp)) {
                path.replaceWith(
                    t.memberExpression(path.node.object, t.identifier(temp.value), false)
                );
            }
        }
    }
})

第四步：处理String.fromCharCode函数调用

经过上面几步的处理，我们发现具体的字符串加密数组也是经过混淆的，大量的调用了String.fromCharCode函数进行处理，所以我们需要对这些进行还原

思路：匹配赋值String.fromCharCode函数的变量，然后匹配所有的函数调用节点，对其进行判断，然后进行替换取值，替换操作

实现代码如下：

var formatProps = [];

traverse(ast, {
    VariableDeclarator(path) {
        const { id, init } = path.node;
        if (t.isMemberExpression(init) && init.object.name === "String" && init.property.name === "fromCharCode") {
            formatProps.push(id.name);
        }
    }
})

traverse(ast, {
    CallExpression(path) {
        var call = path.node.callee;
        if (t.isIdentifier(call) && formatProps.includes(call.name)) {
            var args = path.node.arguments.map(arg => {
                if (t.isNumericLiteral(arg)) {
                    return arg.value;
                }
            })
            var result = String.fromCharCode(args);
            // console.log("fromCharCode:", result, args);
            path.replaceWith(t.stringLiteral(result));
        }
    }
})

第五步：字符串拼接

在经过String.fromCharCode调用处理后，我们发现其中会存在大量的字符串拼接代码

思路：匹配所有的运算表达式，判断是否符合是字符串相加的特征，然后对其进行替换处理

实现代码如下：

/**
 * 尝试优化字符串拼接
 * @param {Path} path 节点路径
 */
function foldStringConcat(path) {
    if (!t.isBinaryExpression(path.node, { operator: "+" })) return;

    // 递归收集所有叶子节点
    const parts = [];

    function flatten(node) {
        if (t.isBinaryExpression(node, { operator: "+" })) {
            flatten(node.left);
            flatten(node.right);
        } else {
            parts.push(node);
        }
    }

    flatten(path.node);

    // 如果所有部分都是字符串常量，则进行合并
    if (parts.every(p => t.isStringLiteral(p))) {
        const result = parts.map(p => p.value).join("");
        path.replaceWith(t.stringLiteral(result));
    }
}


traverse(ast, {
    BinaryExpression(path) {
        foldStringConcat(path);
    }
})

第六步：处理真正的解密函数调用

经过上面的处理后，我们发现真正的字符串混淆是通过n函数和i函数进行还原的，也就是经典的OB混淆。

那么 n 函数和 i函数分别使用到了刚才已经还原的两个字符串数组：o，s

思路：正常解OB混淆即可，先把解密函数拿到本地，然后在匹配所有的函数调用节点对其进行替换即可

实现代码如下：

const decodeFunctions = require('./decodeFunctions');

traverse(ast, {
    CallExpression(path) {
        const { callee, arguments } = path.node;
        try {
            if (t.isIdentifier(callee) && callee.name in decodeFunctions && arguments.length == 1) {
                var args = arguments[0].value;
                var result = decodeFunctions[callee.name](args);
                console.log("Decode:", result, args)
                path.replaceWith(t.valueToNode(result));
            }
        } catch (e) {
            console.log("Decode error:", e, callee.name, args);
        }
    }
})

decodeFunctions文件如下：

第七步：静态执行函数

经过上面的处理可以看到，代码中存在了很多可以进行静态执行出值得代码

思路：

第一：将Q = u["Math"]"ceil",这种代码修改为Q = window["Math"]"ceil",

第二：对使用了window中的函数或者对象得调用节点进行静态执行

实现代码如下：

/**
 * 解析别名链
 * @param {Map} aliasMap 别名映射
 * @param {*} node 节点
 * @returns 解析后的节点
 */
function resolveAliasChain(aliasMap, node) {
    if (t.isIdentifier(node) && aliasMap.has(node.name)) {
        return resolveAliasChain(aliasMap, aliasMap.get(node.name));
    }
    return node;
}

/**
 * 判断节点是否为静态字面量
 * @param {Node} node 待判断的节点
 * @returns {boolean} 是否为静态字面量
 */
function isStaticLiteral(node) {
    return (
        t.isLiteral(node) ||
        (t.isUnaryExpression(node) && t.isLiteral(node.argument))
    );
}

/**
 * 别名映射Map
 */
const aliasMap = new Map();

traverse(ast, {
    VariableDeclarator(path) {
        const { id, init } = path.node;
        if (!t.isIdentifier(id)) return;

        // 形如 var u = window;
        if (t.isIdentifier(init) && init.name === "window") {
            aliasMap.set(id.name, t.identifier("window"));
        }

        // 形如 var f = u["Math"];
        else if (t.isMemberExpression(init)) {
            let base = init.object;
            const resolved = resolveAliasChain(aliasMap, base);

            if (resolved !== base) {
                // 用替换后的路径构建完整路径
                const full = t.memberExpression(resolved, init.property, init.computed);
                aliasMap.set(id.name, full);
            }
        }
    }
});

traverse(ast, {
    MemberExpression(path) {
        if (t.isIdentifier(path.node.object) && aliasMap.has(path.node.object.name)) {
            const resolved = resolveAliasChain(aliasMap, path.node.object);
            path.node.object = resolved;
        }
    },

    // 替换调用中 callee 是别名
    CallExpression(path) {
        const callee = path.node.callee;
        if (t.isIdentifier(callee) && aliasMap.has(callee.name)) {
            const resolved = resolveAliasChain(aliasMap, callee);
            path.node.callee = resolved;
        }
    }
});

traverse(ast, {
    CallExpression(path) {
        const { callee, arguments: args } = path.node;

        if (
            t.isMemberExpression(callee) &&
            t.isIdentifier(callee.object, { name: "window" }) &&
            t.isStringLiteral(callee.property) &&
            args.every(isStaticLiteral)
        ) {
            const funcName = callee.property.value;
            try {
                const result = vm.runInNewContext(`window["${funcName}"](${args.map(a => generate(a).code).join(",")})`, {
                    window: globalThis
                });
                path.replaceWith(t.valueToNode(result));
            } catch (e) {
                console.warn("静态执行失败:", e.message);
            }
        }

        if (
            t.isMemberExpression(callee) &&
            t.isMemberExpression(callee.object) &&
            t.isIdentifier(callee.object.object, { name: "window" }) &&
            t.isStringLiteral(callee.object.property) &&
            t.isStringLiteral(callee.property) &&
            args.every(isStaticLiteral)
        ) {
            const obj = callee.object.property.value;
            const method = callee.property.value;
            try {
                const result = vm.runInNewContext(`window["${obj}"]["${method}"](${args.map(a => generate(a).code).join(",")})`, {
                    window: {
                        Math,
                        parseInt,
                        parseFloat,
                        Number,
                        String,
                        Boolean,
                    }
                });
                path.replaceWith(t.valueToNode(result));
            } catch (e) {
                console.warn("静态执行 Math 等失败:", e.message);
            }
        }
    }
});

第八步：数值变量替换

在经过静态执行后，可以看到定义了大量的数值变量，并且在后续的逻辑中会用到这些变量，我们需要对其进行替换为真正的值

思路：匹配全部的数值变量，并记录其作用域，在调用的地方判断作用域并且进行替换

实现代码如下：

const variables = new Map();

/**
 * 检查是否安全替换节点
 * @param {Path} path 节点路径
 * @returns {boolean} 是否安全替换
 */
function isSafeToReplace(path) {
    const parent = path.parent;
    const key = path.parentKey;

    // 变量声明 示例：const a = 1;
    if (t.isVariableDeclarator(parent) && key === 'id') return false;

    // 函数参数 示例：function func(a, b) { ... }
    if (t.isFunction(parent) && parent.params.includes(path.node)) return false;

    // 函数声明 示例：function func() { ... }
    if (t.isFunctionDeclaration(parent) && key === 'id') return false;

    // 函数表达式 示例：const func = function() { ... };
    if (t.isFunctionExpression(path.parent) && path.parentKey === 'id') return false;

    // 赋值左侧 示例：a = 1;
    if (t.isAssignmentExpression(parent) && key === 'left') return false;

    // 更新表达式 ++a / --a
    if (t.isUpdateExpression(parent) && key === 'argument') return false;

    // 解构赋值 示例：const { a, b } = obj;
    if (t.isObjectProperty(parent) && key === 'key' && !parent.computed) return false;

    return true;
}

traverse(ast, {
    VariableDeclarator(path) {
        const { id, init } = path.node;
        if (!t.isIdentifier(id)) return;
        let value = null;
        if (t.isNumericLiteral(init)) {
            value = init.value;
        } else if (
            t.isUnaryExpression(init) &&
            init.operator === '-' &&
            t.isNumericLiteral(init.argument)
        ) {
            value = -init.argument.value;
        }

        // 保证作用域
        if (value !== null) {
            variables.set(`${id.name}@@${path.scope.uid}`, {
                value,
                scope: path.scope,
            });
        }
    }
});

traverse(ast, {
    Identifier(path) {
        const name = path.node.name;

        if (!isSafeToReplace(path)) return;

        const binding = path.scope.getBinding(name);
        const uid = binding?.scope?.uid;
        const key = `${name}@@${uid}`;

        if (variables.has(key)) {
            const { value } = variables.get(key);
            if (typeof value === "number") {
                path.replaceWith(t.numericLiteral(value));
            }
        }
    }
});

第九步：表达式静态计算

在替换完数值调用后，我们发现原本不支持静态计算的表达式现在支持了，所以我们需要对这种表达式进行静态执行并且替换

思路：匹配全部节点都为数值操作的表达式节点，进行静态执行然后替换

实现代码如下：


function tryEvaluateExpression(path) {
    try {
        const { confident, value } = path.evaluate();
        if (confident && typeof value === 'number') {
            path.replaceWith(t.numericLiteral(value));
        }
    } catch (err) {
        // 非法表达式跳过
    }
}

traverse(ast, {
    BinaryExpression(path) {
        tryEvaluateExpression(path);
    },
    UnaryExpression(path) {
        tryEvaluateExpression(path);
    },
    LogicalExpression(path) {
        tryEvaluateExpression(path);
    },
    ConditionalExpression(path) {
        tryEvaluateExpression(path);
    },
});

删除无用节点

在全部处理完后，存在了很多无用节点，我们对其进行删除处理

实现代码如下：

traverse(ast, {
    VariableDeclarator(path) {
        const binding = path.scope.getBinding(path.node.id.name);

        // 如果该变量从未被使用
        if (binding && !binding.referenced) {
            // 删除整个变量声明语句（VariableDeclaration）
            const parent = path.parentPath;

            // 如果只定义了一个变量，直接删整行
            if (parent.node.declarations.length === 1) {
                parent.remove();
            } else {
                // 否则只移除这个变量定义
                path.remove();
            }
        }
    }
});

traverse(ast, {
    FunctionDeclaration(path) {
        const name = path.node.id.name;
        const binding = path.scope.getBinding(name);

        if (binding && !binding.referenced) {
            path.remove();
        }
    }
});

结语

作者也是小白，本文章仅是为了记录学习过程，其中可能存在很严重的BUG或可优化点，仅仅提供我在做的时候的想法，不一定对，所以不要过度相信本文以及本文中的代码👍👍