千家信息网

PHP怎么实现词法分析与自定义语言

发表于:2024-11-26 作者:千家信息网编辑
千家信息网最后更新 2024年11月26日,本文小编为大家详细介绍"PHP怎么实现词法分析与自定义语言",内容详细,步骤清晰,细节处理妥当,希望这篇"PHP怎么实现词法分析与自定义语言"文章能帮助大家解决疑惑,下面跟着小编的思路慢慢深入,一起来
千家信息网最后更新 2024年11月26日PHP怎么实现词法分析与自定义语言

本文小编为大家详细介绍"PHP怎么实现词法分析与自定义语言",内容详细,步骤清晰,细节处理妥当,希望这篇"PHP怎么实现词法分析与自定义语言"文章能帮助大家解决疑惑,下面跟着小编的思路慢慢深入,一起来学习新知识吧。

之前项目有一个需求,业务人员使用中文编写一些自定义公式,然后需要我们后台执行将结果返回到界面上,于是就基于有限状态机写了这个词法分析器,比较简单,希望能够抛砖引玉。

一、分析需求

输入中文公式,返回结果,比如:

现有薪资=10000;个税起点=3000;当前年份=2021;如果(当前年份=2022){    个税起点=5000;}返回 (现有薪资-个税起点) * 0.2;

二、实现需求

最初的想法是使用字符串替换的方式,将中文关键字替换成php的关键字,然后调用eval执行,这样确实也是可以的,但是总觉得不是很美丽,并且不能实现动态解析。就想着自己实现一个简单的词法分析,然后结合ast将词法转换成php代码执行,岂不快哉。当前版本没有用到抽象语法树来生成代码,全部使用字符串拼接。

", "<", "!", "(", ")", "{", "}", ",", ";"    ];    // 源代码    private $input;    // 当前的字符    private $currChar;    // 当前字符位置    private $currCharPos = 0;    // 结束符    private $eof = "eof";    // 当前编码    private $currEncode  = "UTF-8";    // 内置关键字    public const VAR = "variable";    public const STR = "string";    public const KW  = "keyword";    public const OPR = "operator";    public const INT = "integer";    public const NIL = "null";    /**     * Lexer constructor.     * @param string $input     */    public function __construct(string $input) {        $this->input    = $input;        $this->currChar = mb_substr($this->input, $this->currCharPos, 1);    }    /**     * @param array $keywordList     */    public function setKeywordList($keywordList) {        $this->keywordList = $keywordList;    }    /**     * @return array     * @throws Exception     */    public function parseInput() {        if ($this->input == "") {            throw new Exception("code can not be empty");        }        $tokens = [];        do {            $token = $this->nextToken();            if ($token["type"] != "eof") {                $tokens[] = $token;            }            if ($token["type"] == self::KW) {                $tokens[] = $this->makeToken(self::NIL, " ");            }        } while ($token["type"] != "eof");        return $tokens;    }    /**     * @return array     */    public function nextToken() {        $this->skipBlankChar();        $this->currChar == "" && $this->currChar = $this->eof;        if ($this->isCnLetter()) {            $word = $this->matchUntilNextCharIsNotCn();            if ($this->isKeyword($word)) {                $this->currCharPos -= 1;                return $this->currToken(static::KW, $word);            }            // 不是关键字的全部归为变量            return $this->makeToken(static::VAR, $word);        }        // 如果是操作符        if ($this->isOperator()) {            return $this->currToken(static::OPR, $this->currChar);        }        // 如果是数字        if ($this->isNumber()) {            return $this->currToken(static::INT, $this->currChar);        }        // 如果是字符串        if ($str = $this->isStr()) {            return $this->currToken(static::STR, $str);        }        // 如果是变量        if ($this->isVar()) {            $word = $this->matchVar();            if ($this->isKeyword($word)) {                return $this->currToken(static::KW, $word);            }            return $this->makeToken(static::VAR, $word);        }        if ($this->currChar == $this->eof) {            return $this->currToken('eof', $this->currChar);        }        return $this->currToken(static::VAR, $this->currChar);    }    /**     * @param string $input     * @return string     */    private function matchVar(string $input = "") {        $word = $input ?: '';        while ($this->isVar()) {            $word .= $this->currChar;            $this->nextChar();        }        return $word;    }    /**     * @return bool     * 是否为普通变量     */    private function isVar() {        return $this->isCnLetter() || $this->isEnLetter();    }    /**     * 跳过空白字符     */    private function skipBlankChar() {        while (ord($this->currChar) == 10 ||            ord($this->currChar) == 13 ||            ord($this->currChar) == 32) {            $this->nextChar();        }    }    /**     * @param string $type     * @param $word     * @return array     * 记录当前token和下一个字符     */    private function currToken(string $type, $word) {        $token = $this->makeToken($type, $word);        $this->nextChar();        return $token;    }    /**     * @param string $type     * @param string $char     * @return array     */    private function makeToken(string $type, string $char) {        return ["type" => $type, "char" => $char, "pos" => $this->currCharPos];    }    /**     * @return bool     * 判断是否是英文字符     */    private function isEnLetter() {        if ($this->currChar == "" || $this->currChar == $this->eof) {            return false;        }        $ord = mb_ord($this->currChar, $this->currEncode);        if ($ord > ord('a') && $ord < ord('z')) {            return true;        }        return false;    }    /**     * @return false|int     * 是否中文字符     */    private function isCnLetter() {        return preg_match("/^[\x{4e00}-\x{9fa5}]+$/u", $this->currChar);    }    /**     * @return bool     * 是否为数字     */    private function isNumber() {        return is_numeric($this->currChar);    }    /**     * @return bool     * 是否是字符串     */    private function isStr() {        return $this->matchCompleteStr();    }    /**     * @return string     * 匹配完整字符串     */    private function matchCompleteStr() {        $char = "";        if ($this->currChar == "\"") {            $this->nextChar();            while ($this->currChar != "\"") {                if ($this->currChar != "\"") {                    $char .= $this->currChar;                }                $this->nextChar();            }            return $char;        }        return $char;    }    /**     * @return bool     * 是否是操作符     */    private function isOperator() {        return in_array($this->currChar, $this->operatorList);    }    /**     * @return string     * 匹配中文字符     */    private function matchUntilNextCharIsNotCn() {        $char = "";        while ($this->isCnLetter()) {            $char .= $this->currChar;            $this->nextChar();        }        return $char;    }    /**     * @return void 获取下一个字符     * 获取下一个字符     */    private function nextChar() {        $this->currCharPos += 1;        $this->currChar    = mb_substr($this->input, $this->currCharPos, 1);        if ($this->currChar == "") {            $this->currChar = $this->eof;        }    }    /**     * @param string $input     * @return bool     * 是否是关键字     */    private function isKeyword(string $input) {        return ($this->keywordList[$input] ?? "") != "";    }    public function convert(array $tokens) {        $code = "";        foreach ($this->lexerIterator($tokens) as $generator) {            switch ($generator["type"]) {                case static::KW:                    $code .= $this->keywordList[$generator["char"]];                    break;                case static::VAR:                    $code .= sprintf("$%s", $generator["char"]);                    break;                case static::OPR:                    $code .= $this->replace($generator["char"]);                    break;                case static::INT:                    $code .= $generator["char"];                    break;                case static::STR:                    $code .= sprintf("\"%s\"", $generator["char"]);                    break;                default:                    $code .= $generator["char"];            }        }        return $code;    }    private function replace(string $char) {        return str_replace("+", ".", $char);    }    /**     * @param array $tokens     * @return \Generator     */    private function lexerIterator(array $tokens) {        foreach ($tokens as $index => $token) {            yield $token;        }    }}

三、如何使用

require __DIR__ . "/vendor/autoload.php";// 定义一段代码$code = << 3){    地址=1;}否则{    地址="艾欧尼亚"}说话 = ("我"+"爱")+"你";返回 姓名+年龄;EOF;$lexer = new Lexer($code);// 自定义你的关键字$kwMap = [    "如果" => "if", "否则" => "else", "返回" => "return", "否则如果" => "elseif"];$lexer->setKeywordList($kwMap);// 这里是生成的词$tokens = $lexer->parseInput();// 将生成的词转成php,当然你也可以尝试用php-parse转ast再转成php,这里只是简单的拼接var_dump($lexer->convert($tokens));

生成词

[{    "type": "variable",    "char": "姓名",    "pos": 2}, {    "type": "operator",    "char": "=",    "pos": 2}, {    "type": "string",    "char": "腕豪",    "pos": 7}, {    "type": "operator",    "char": ";",    "pos": 8}, {    "type": "variable",    "char": "问候",    "pos": 13}, {    "type": "operator",    "char": "=",    "pos": 13}, {    "typ e": "string",    "char": "你好啊",    "pos": 17}, {    "type": "operator",    "char": ";",    "pos": 18}, {    "type": "variable",    "char": "地址",    "pos": 23}, {    "type": "operator",    "char": "=",    "pos": 23}, {    "type": "operator",    "char": "(",    "pos": 24}, {    "type": "integer",    "char": "1",    "pos": 25}, {    "type": "operator",    "char": " +",    "pos": 26}, {    "type": "integer",    "char": "2",    "pos": 27}, {    "type": "operator",    "char": ")",    "pos": 28}, {    "type": "operator",    "char": "*",    "pos": 30}, {    "type": "integer",    "char": "3",    "pos": 32}, {    "type": "operator",    "char": ";",    "pos": 33}, {    "type": "keyword",    "char": "如果",    "pos": 37}, {    "type": "nul l",    "char": " ",    "pos": 38}, {    "type": "operator",    "char": "(",    "pos": 38}, {    "type": "variable",    "char": "地址",    "pos": 41}, {    "type": "operator",    "char": ">",    "pos": 42}, {    "type": "integer",    "char": "3",    "pos": 44}, {    "type": "operator",    "char": ")",    "pos": 45}, {    "type": "operator",    "char": "{",    "pos": 46}, {    "type": "variable",    "char": "地址",    "pos": 55}, {    "type": "operator",    "char": "=",    "pos": 55}, {    "type": "integer",    "char": "1",    "pos": 56}, {    "type": "operator",    "char": ";",    "pos": 57}, {    "type": "operator",    "char": "}",    "pos": 60}, {    "type": "keyword",    "char": "否则",    "pos": 62}, {    "type": "null",    "char ": " ",    "pos": 63}, {    "type": "operator",    "char": "{",    "pos": 63}, {    "type": "variable",    "char": "地址",    "pos": 72}, {    "type": "operator",    "char": "=",    "pos": 72}, {    "type": "string",    "char": "艾欧尼亚",    "pos": 78}, {    "type": "operator",    "char": ";",    "pos": 79}, {    "type": "operator",    "char": "}",    "pos": 82}, {    "type": "variable",    "char": "说话",    "pos": 87}, {    "type": "operator",    "char": "=",    "pos": 88}, {    "type": "operator",    "char": "(",    "pos": 90}, {    "type": "string",    "char": "我",    "pos": 93}, {    "type": "operator",    "char": "+",    "pos": 94}, {    "type": "string",    "char": "爱",    "pos": 97}, {    "type": "operator",    "char": ")",    "pos": 98}, {    "type": "operator",    "char": "+",    "pos": 99}, {    "type": "string",    "char": "你",    "pos": 102}, {    "type": "operator",    "char": ";",    "pos": 103}, {    "type": "keyword",    "char": "返回",    "pos": 107}, {    "type": "null",    "char": " ",    "pos": 108}, {    "type": "variable",    "char": "姓名",    "pos": 111}, {    "typ e": "operator",    "char": "+",    "pos": 111}, {    "type": "variable",    "char": "年龄",    "pos": 114}, {    "type": "operator",    "char": ";",    "pos": 114}]

输出:

$姓名="腕豪";$问候="你好啊";$地址=(1.2)*3;if ($地址>3){$地址=1;}else {$地址="艾欧尼亚";}$说话=("我"."爱")."你";return $姓名.$年龄;

能执行吗?当然能。还存在一些小bug,不想改了。

四、使用场景

什么,居然有人说没什么用?oa系统总有用到的时候。

读到这里,这篇"PHP怎么实现词法分析与自定义语言"文章已经介绍完毕,想要掌握这篇文章的知识点还需要大家自己动手实践使用过才能领会,如果想了解更多相关内容的文章,欢迎关注行业资讯频道。

字符 地址 词法 分析 关键 关键字 姓名 字符串 生成 语言 代码 变量 年龄 文章 起点 需求 尼亚 中文 你好 公式 数据库的安全要保护哪些东西 数据库安全各自的含义是什么 生产安全数据库录入 数据库的安全性及管理 数据库安全策略包含哪些 海淀数据库安全审计系统 建立农村房屋安全信息数据库 易用的数据库客户端支持安全管理 连接数据库失败ssl安全错误 数据库的锁怎样保障安全 我市首个网络安全实训基地 小程序服务器到期不续费会怎样 android中的数据库 苏州web前端软件开发服务费 数据库参照关系 网络安全责任追究制度执行 宁波互联网智慧工地软件开发 ict软件开发 服务器组装好后怎么装系统 2021祥云杯网络安全大赛 计算机网络技术研究生 列举数据库的不安全因素 软件开发质量保证大纲 幼师学网络安全法心得体会 以网络安全手册为主题的手抄报 网络安全手抄报复杂精美 争当好网民网络安全法制培训 安全探针服务器开源 国家十二部门网络安全审查 生财有道视频软件开发 驾驶证吊销在数据库找得到信息吗 中国网络安全最新研究 图片名称怎么存储到数据库 家用电脑改装服务器 武清区辅助网络技术不二之选 更改网络安全钥匙 卓翼科技工业互联网签约 app软件开发人员有哪些 基础地理信息城市数据库 h2-3光猫打印机服务器
0