;;; matlab-syntax.el --- Manage MATLAB syntax tables and buffer parsing. ;; ;; Copyright (C) 2021 Eric Ludlam ;; ;; Author: ;; ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see https://www.gnu.org/licenses/. ;;; Commentary: ;; ;; Manage syntax handling for `matlab-mode'. ;; Matlab's syntax for comments and strings can't be handled by a standard ;; Emacs syntax table. This code handles the syntax table, and special ;; scanning needed to augment a buffer's syntax for all our special cases. ;; ;; This file also handles all the special parsing needed to support indentation, ;; block scanning, and the line. (require 'matlab-compat) ;;; Code: (defvar matlab-syntax-support-command-dual nil "Non-nil means to support command dual for indenting and syntax highlight. Does not work well in classes with properties with datatypes.") (make-variable-buffer-local 'matlab-syntax-support-command-dual) (put 'matlab-syntax-support-command-dual 'safe-local-variable #'booleanp) (defvar matlab-syntax-table (let ((st (make-syntax-table (standard-syntax-table)))) ;; Comment Handling: ;; Multiline comments: %{ text %} ;; Single line comments: % text (single char start) ;; Ellipsis omments: ... text (comment char is 1st char after 3rd dot) ;; ^ handled in `matlab--syntax-propertize' (modify-syntax-entry ?% "< 13" st) (modify-syntax-entry ?{ "(} 2c" st) (modify-syntax-entry ?} "){ 4c" st) (modify-syntax-entry ?\n ">" st) ;; String Handling: ;; Character vector: 'text' ;; String: "text" ;; These next syntaxes are handled with `matlab--syntax-propertize' ;; Transpose: varname' ;; Quoted quotes: ' don''t ' or " this "" " ;; Unterminated Char V: ' text (modify-syntax-entry ?' "\"" st) (modify-syntax-entry ?\" "\"" st) ;; Words and Symbols: (modify-syntax-entry ?_ "_" st) ;; Punctuation: (modify-syntax-entry ?\\ "." st) (modify-syntax-entry ?\t " " st) (modify-syntax-entry ?+ "." st) (modify-syntax-entry ?- "." st) (modify-syntax-entry ?* "." st) (modify-syntax-entry ?/ "." st) (modify-syntax-entry ?= "." st) (modify-syntax-entry ?< "." st) (modify-syntax-entry ?> "." st) (modify-syntax-entry ?& "." st) (modify-syntax-entry ?| "." st) ;; Parentheticl blocks: ;; Note: these are in standard syntax table, repeated here for completeness. (modify-syntax-entry ?\( "()" st) (modify-syntax-entry ?\) ")(" st) (modify-syntax-entry ?\[ "(]" st) (modify-syntax-entry ?\] ")[" st) ;;(modify-syntax-entry ?{ "(}" st) - Handled as part of comments ;;(modify-syntax-entry ?} "){" st) st) "MATLAB syntax table") (defvar matlab-navigation-syntax-table (let ((st (copy-syntax-table matlab-syntax-table))) ;; Make _ a part of words so we can skip them better (modify-syntax-entry ?_ "w" st) st) "The syntax table used when navigating blocks.") (defmacro matlab-navigation-syntax (&rest forms) "Set the current environment for syntax-navigation and execute FORMS." (declare (indent 0)) (list 'let '((oldsyntax (syntax-table)) (case-fold-search nil)) (list 'unwind-protect (list 'progn '(set-syntax-table matlab-navigation-syntax-table) (cons 'progn forms)) '(set-syntax-table oldsyntax)))) (add-hook 'edebug-setup-hook (lambda () (def-edebug-spec matlab-navigation-syntax def-body))) ;;; Buffer Scanning for Syntax Table Augmentation ;; ;; To support all our special syntaxes via syntax-ppss (parse partial ;; sexp), we need to scan the buffer for patterns, and then leave ;; behind the hints pps needs to do the right thing. ;; ;; Support is broken up in these functions: ;; * matlab--put-char-category - Apply a syntax category to a character ;; * matlab--syntax-symbol - Create a syntax category symbol ;; * matlab--syntax-propertize - Used as `syntax-propertize-function' for ;; doing the buffer scan to augment syntxes. ;; * matlab--scan-line-* - Scan for specific types of syntax occurances. (defun matlab--put-char-category (pos category) "At character POS, put text CATEGORY." (when (not (eobp)) (put-text-property pos (1+ pos) 'category category) (put-text-property pos (1+ pos) 'mcm t)) ) (defmacro matlab--syntax-symbol (symbol syntax doc) "Create a new SYMBOL used as a text property category with SYNTAX." (declare (indent defun)) `(progn (defvar ,symbol ,syntax ,doc) (set ',symbol ,syntax) ;; So you can re-eval it. (put ',symbol 'syntax-table ,symbol) )) (matlab--syntax-symbol matlab--command-dual-syntax '(15 . nil) ;; Generic string "Syntax placed on end-of-line for unterminated strings.") (put 'matlab--command-dual-syntax 'command-dual t) ;; Font-lock cookie (matlab--syntax-symbol matlab--unterminated-string-syntax '(15 . nil) ;; Generic string end "Syntax placed on end-of-line for unterminated strings.") (put 'matlab--unterminated-string-syntax 'unterminated t) ;; Font-lock cookie (matlab--syntax-symbol matlab--ellipsis-syntax (string-to-syntax "< ") ;; comment char "Syntax placed on ellipsis to treat them as comments.") (matlab--syntax-symbol matlab--not-block-comment-syntax (string-to-syntax "(}") ;; Just a regular open brace "Syntax placed on ellipsis to treat them as comments.") (defun matlab--syntax-propertize (&optional start end) "Scan region between START and END for unterminated strings. Only scans whole-lines, as MATLAB is a line-based language. If region is not specified, scan the whole buffer. See `matlab--scan-line-for-ellipsis', `matlab--san-line-bad-blockcomment', and `matlab--scan-line-for-unterminated-string' for specific details." (save-match-data ;; avoid 'Syntax Checking transmuted the match-data' (save-excursion ;; Scan region, but always expand to beginning of line (goto-char (or start (point-min))) (beginning-of-line) ;; Clear old properties (remove-text-properties (point) (save-excursion (goto-char (or end (point-max))) (end-of-line) (point)) '(category nil mcm nil)) ;; Apply properties (while (and (not (>= (point) (or end (point-max)))) (not (eobp))) (when matlab-syntax-support-command-dual ;; Commandl line dual comes first to prevent wasting time ;; in later checks. (beginning-of-line) (when (matlab--scan-line-for-command-dual) (matlab--put-char-category (point) 'matlab--command-dual-syntax) (end-of-line) (matlab--put-char-category (point) 'matlab--command-dual-syntax) )) ;; Multiple ellipsis can be on a line. Find them all (beginning-of-line) (while (matlab--scan-line-for-ellipsis) ;; Mark ellipsis as if a comment. (matlab--put-char-category (point) 'matlab--ellipsis-syntax) (forward-char 3) ) ;; Multiple invalid block comment starts possible. Find them all (beginning-of-line) (while (matlab--scan-line-bad-blockcomment) ;; Mark 2nd char as just open brace, not punctuation. (matlab--put-char-category (point) 'matlab--not-block-comment-syntax) ) ;; Look for an unterminated string. Only one possible per line. (beginning-of-line) (when (matlab--scan-line-for-unterminated-string) ;; Mark this one char plus EOL as end of string. (let ((start (point))) (matlab--put-char-category (point) 'matlab--unterminated-string-syntax) (end-of-line) (matlab--put-char-category (point) 'matlab--unterminated-string-syntax) )) (beginning-of-line) (forward-line 1)) ))) (declare-function matlab-keyword-p "matlab-scan") (defun matlab--scan-line-for-command-dual (&optional debug) "Scan this line for command line duality strings." ;; Note - add \s$ b/c we'll add that syntax to the first letter, and it ;; might still be there during an edit! (when (looking-at "^\\s-*\\([a-zA-Z_]\\(?:\\w\\|\\s_\\)*\\)\\s-+\\(\\s$\\|\\w\\|\\s_\\)") ;; This is likely command line dual for a function. (when (not (matlab-keyword-p 1)) (goto-char (match-beginning 2))))) (matlab--syntax-symbol matlab--transpose-syntax '(3 . nil) ;; 3 = symbol "Treat ' as non-string when used as transpose.") (matlab--syntax-symbol matlab--quoted-string-syntax '(9 . nil) ;; 9 = escape in a string "Treat '' or \"\" as not string delimeteres when inside a string.") (defun matlab--scan-line-for-unterminated-string (&optional debug) "Scan this line for an unterminated string, leave cursor on starting string char." ;; First, scan over all the string chars. (save-restriction (narrow-to-region (point-at-bol) (point-at-eol)) (beginning-of-line) (condition-case err (while (re-search-forward "\\s\"\\|\\s<" nil t) (let ((start-str (match-string 0)) (start-char (match-beginning 0))) (forward-char -1) (if (looking-at "\\s<") (forward-comment 1) ;; Else, check for valid string (if (or (bolp) (string= start-str "\"") (save-excursion (forward-char -1) (not (looking-at "\\(\\w\\|\\s_\\|\\s)\\|\"\\|\\.\\)")))) (progn ;; Valid string start, try to skip the string (forward-sexp 1) ;; If we just finished and we have a double of ourselves, ;; convert those doubles into punctuation. (when (looking-at start-str) (forward-char -1) (matlab--put-char-category (point) 'matlab--quoted-string-syntax) ;; and try again. (goto-char start-char) )) (when (string= start-str "'") ;; If it isn't valid string, it's just transpose or something. ;; convert to a symbol - as a VAR'', the second ' needs to think it ;; is not after punctuation. (matlab--put-char-category (point) 'matlab--transpose-syntax)) ;; Move forward 1. (forward-char 1) ))) nil) (error t)))) (defun matlab--scan-line-bad-blockcomment () "Scan this line for invalid block comment starts." (when (and (re-search-forward "%{" (point-at-eol) t) (not (looking-at "\\s-*$"))) (goto-char (1- (match-end 0))) t)) (defun matlab--scan-line-for-ellipsis () "Scan this line for an ellipsis." (when (re-search-forward "\\.\\.\\." (point-at-eol) t) (goto-char (match-beginning 0)) t)) ;;; Font Lock Support: ;; ;; The syntax specific font-lock support handles comments and strings. ;; ;; We'd like to support multiple kinds of strings and comments. To do ;; that we overload `font-lock-syntactic-face-function' with our own. ;; This does the same job as the orriginal, except we scan the start ;; for special cookies left behind by `matlab--syntax-propertize' and ;; use that to choose different fonts. (defun matlab--font-lock-syntactic-face (pps) "Return the face to use for the syntax specified in PPS." ;; From the default in font-lock. ;; (if (nth 3 state) font-lock-string-face font-lock-comment-face) (if (nth 3 pps) ;; This is a string. Check the start char to see if it was ;; marked as an unterminate string. (cond ((get-text-property (nth 8 pps) 'unterminated) 'matlab-unterminated-string-face) ((get-text-property (nth 8 pps) 'command-dual) 'matlab-commanddual-string-face) (t 'font-lock-string-face)) ;; Not a string, must be a comment. Check to see if it is a ;; cellbreak comment. (cond ((and (< (nth 8 pps) (point-max)) (= (char-after (1+ (nth 8 pps))) ?\%)) 'matlab-cellbreak-face) (t 'font-lock-comment-face)) )) ;;; SETUP ;; ;; Connect our special logic into a running MATLAB Mode ;; replacing existing mechanics. ;; ;; Delete this if/when it becomes a permanent part of `matlab-mode'. (defun matlab-syntax-setup () "Integrate our syntax handling into a running `matlab-mode' buffer. Safe to use in `matlab-mode-hook'." ;; Syntax Table support (set-syntax-table matlab-syntax-table) (make-local-variable 'syntax-propertize-function) (setq syntax-propertize-function 'matlab--syntax-propertize) ;; Comment handlers (make-local-variable 'comment-start) (make-local-variable 'comment-end) (make-local-variable 'comment-start-skip) (make-local-variable 'page-delimiter) (setq comment-start "%" comment-end "" comment-start-skip "%\\s-+" page-delimiter "^\\(\f\\|%%\\(\\s-\\|\n\\)\\)") ;; Other special regexps handling different kinds of syntax. (make-local-variable 'paragraph-start) (setq paragraph-start (concat "^$\\|" page-delimiter)) (make-local-variable 'paragraph-separate) (setq paragraph-separate paragraph-start) (make-local-variable 'paragraph-ignore-fill-prefix) (setq paragraph-ignore-fill-prefix t) (make-local-variable 'page-delimiter) (setq page-delimiter "^\\(\f\\|%%\\(\\s-\\|\n\\)\\)") ;; Font lock (make-local-variable 'font-lock-syntactic-face-function) (setq font-lock-syntactic-face-function 'matlab--font-lock-syntactic-face) ) ;;; Syntax Testing for Strings and Comments ;; ;; These functions detect syntactic context based on the syntax table. (defsubst matlab-cursor-in-string-or-comment () "Return non-nil if the cursor is in a valid MATLAB comment or string." (nth 8 (syntax-ppss (point)))) (defsubst matlab-cursor-in-comment () "Return t if the cursor is in a valid MATLAB comment." (nth 4 (syntax-ppss (point)))) (defsubst matlab-cursor-in-string (&optional incomplete) "Return t if the cursor is in a valid MATLAB character vector or string scalar. Note: INCOMPLETE is now obsolete If the optional argument INCOMPLETE is non-nil, then return t if we are in what could be a an incomplete string. (Note: this is also the default)" (nth 3 (syntax-ppss (point)))) (defun matlab-cursor-comment-string-context (&optional bounds-sym) "Return the comment/string context of cursor for the current line. Return 'comment if in a comment. Return 'string if in a string. Return 'charvector if in a character vector Return 'ellipsis if after an ... ellipsis Return nil if none of the above. Scans from the beginning of line to determine the context. If optional BOUNDS-SYM is specified, set that symbol value to the bounds of the string or comment the cursor is in" (let* ((pps (syntax-ppss (point))) (start (nth 8 pps)) (end 0) (syntax nil)) ;; Else, inside something if 'start' is set. (when start (save-match-data (save-excursion (goto-char start) ;; Prep for extra checks. (setq syntax (cond ((eq (nth 3 pps) t) (cond ((= (following-char) ?') 'charvector) ((= (following-char) ?\") 'string) (t 'commanddual))) ((eq (nth 3 pps) ?') 'charvector) ((eq (nth 3 pps) ?\") 'string) ((nth 4 pps) (if (= (following-char) ?\%) 'comment 'ellipsis)) (t nil))) ;; compute the bounds (when (and syntax bounds-sym) (if (memq syntax '(charvector string)) ;;(forward-sexp 1) - overridden - need primitive version (goto-char (scan-sexps (point) 1)) (forward-comment 1) (if (bolp) (forward-char -1))) (set bounds-sym (list start (point)))) ))) ;; Return the syntax syntax)) (defsubst matlab-beginning-of-string-or-comment () "If the cursor is in a string or comment, move to the beginning. Returns non-nil if the cursor moved." (let* ((pps (syntax-ppss (point)))) (when (nth 8 pps) (goto-char (nth 8 pps)) ))) (defun matlab-end-of-string-or-comment () "If the cursor is in a string or comment, move to the end. Returns non-nil if the cursor moved." (let* ((pps (syntax-ppss (point)))) (when (nth 8 pps) ;; syntax-ppss doesn't have the end, so go to the front ;; and then skip forward. (goto-char (nth 8 pps)) (if (nth 3 pps) (goto-char (scan-sexps (point) 1)) (forward-comment 1)) ))) ;;; Block Comment handling ;; ;; Old version block comments were handled in a special way. ;; Can we simplify with syntax tables? (defconst matlab-block-comment-start-re "^\\s-*%{\\s-*$" "Regexp that matches the beginning of a block comment. Block comment indicators must be on a line by themselves.") (defconst matlab-block-comment-end-re "^\\s-*%}\\s-*$" "Regexp that matches the end of a block comment. Block comment indicators must be on a line by themselves.") (defun matlab-ltype-block-comment-start () "Return non-nil if the current line is a block comment start." (save-excursion (beginning-of-line) (looking-at matlab-block-comment-start-re))) (defun matlab-ltype-block-comment-end () "Return non-nil if the current line is a block comment end." (save-excursion (beginning-of-line) (looking-at matlab-block-comment-end-re))) (defun matlab-block-comment-bounds (&optional linebounds) "Return start and end positions of block comment if we are in one. Optional LINEBOUNDS specifies if returned limits are line based instead of character based." (let* ((pps (syntax-ppss (point))) (start (nth 8 pps)) (end 0)) ;; 4 is comment flag. 7 is '2' if block comment (when (and (nth 4 pps) (eq (nth 7 pps) 2)) (save-excursion (goto-char start) (forward-comment 1) (setq end (point))) (if linebounds ;; Bounds expanded to beginning/end of the line (cons (save-excursion (goto-char start) (point-at-bol)) (save-excursion (goto-char end) (point-at-eol))) ;; Just the bounds (cons start end))))) ;;; Navigating Lists ;; ;; MATLAB's lists are (), {}, []. ;; We used to need to do special stuff, but now I think this ;; is just a call striaght to up-list. (defun matlab-up-list (count) "Move forwards or backwards up a list by COUNT. When travelling backward, use `syntax-ppss' counted paren starts to navigate upward. When travelling forward, use 'up-list' diretly, but disable comment and string crossing." (save-restriction (matlab-beginning-of-string-or-comment) (if (< count 0) (let ((pps (syntax-ppss))) (when (< (nth 0 pps) (abs count)) (error "Cannot navigate up %d lists" (abs count))) ;; When travelling in reverse, we can just use pps' ;; parsed paren list in slot 9. (let ((posn (reverse (nth 9 pps)))) ;; Location of parens (goto-char (nth (1- (abs count)) posn)))) ;; Else - travel forward (up-list count nil t)) ;; will this correctly ignore comments, etc? )) (defsubst matlab-beginning-of-outer-list () "If the cursor is in a list, move to the beginning of outermost list. Returns non-nil if the cursor moved." (let* ((pps (syntax-ppss (point)))) (when (nth 9 pps) (goto-char (car (nth 9 pps))) ))) (defun matlab-end-of-outer-list () "If the cursor is in a list, move to the end of the outermost list.. Returns non-nil if the cursor moved." (let* ((pps (syntax-ppss (point)))) (when (nth 9 pps) ;; syntax-ppss doesn't have the end, so go to the front ;; and then skip forward. (goto-char (car (nth 9 pps))) (goto-char (scan-sexps (point) 1)) ))) ;;; Syntax Compat functions ;; ;; Left over old APIs. Delete these someday. (defun matlab-move-simple-sexp-backward-internal (count) "Move backward COUNT number of MATLAB sexps." (forward-sexp (- count))) (defun matlab-move-simple-sexp-internal(count) "Move over one MATLAB sexp COUNT times. If COUNT is negative, travel backward." (forward-sexp count)) (provide 'matlab-syntax) ;;; matlab-syntax.el ends here