Complete re-write of charvec/string highlighting support.

Regex version had too many mismatch conditions that could go subtly wrong.
New version philosophy:
* A single font-locker for comment, charvec, and comments.
* After matching a start for one of the above, identify the end
  location, custom for each type.
This prevents interaction between multiple font lock matchers.
For predicates like matlab-cursor-in-comment, same idea to avoid same problem.
* add a new fcn 'matlab-cursor-comment-string-context' which starts at
  beginning of line, and scans to point, identifying where the cursor
  is in.
* all predicates now use this new fcn.
Also, fix elipsis as comment highlighter.
This commit is contained in:
Eric Ludlam 2019-09-18 23:05:56 -04:00
parent 350f063933
commit 9664fb88ea

370
matlab.el
View file

@ -724,125 +724,119 @@ If font lock is not loaded, lay in wait."
km) km)
"The keymap used in `matlab-mode'.") "The keymap used in `matlab-mode'.")
;;; Font locking keywords ===================================================== ;;; Font Lock : Character Vectors and Strings and Comments ================================
;;
;;; Font Lock Chracter Vector constant support ;; Combine these, but do all the matching internally instead of using regexp
;; because it's just too complex for a regular expression.
(defvar matlab-string-start-regexp "\\(^\\|[^]})a-zA-Z0-9_.'\"]\\)" (defvar matlab-string-start-regexp "\\(^\\|[^]})a-zA-Z0-9_.'\"]\\)"
"Regexp used to represent the character before the char vector or string scalars. "Regexp used to represent the character before the char vector or string scalars.
The ' character has restrictions on what starts a string which is needed The ' character has restrictions on what starts a string which is needed
when attempting to understand the current context.") when attempting to understand the current context.")
;; To quote a quote, put two in a row, thus we need an anchored (defvar matlab-font-lock-string-start-regexp (concat matlab-string-start-regexp "\\(['\"]\\)")
;; first quote. In addition, we don't want to color strings in comments. "Regexp used by font lock to find the beginning of a char vector or string scalar.")
(defvar matlab-charvec-end-regexp "[^'\n]*\\(''[^'\n]*\\)*'"
"Regexp used to represent the character pattern for ending a character vector.
The ' character can be used as a transpose, and can transpose transposes.
Therefore, to end, we must check all that goop.")
(defun matlab-font-lock-charvec-match-normal (limit) (defvar matlab-font-lock-string-and-comment-start-regexp (concat matlab-font-lock-string-start-regexp
"\\|\\(%\\)")
"Starting matcher for allstring comment font lock")
(defun test-ml-asc ()
(interactive)
;(beginning-of-line)
(matlab-font-lock-allstring-comment-match-normal (point-max))
(goto-char (match-beginning 0))
(call-interactively 'set-mark-command)
(goto-char (match-end 0)))
(defun matlab-font-lock-allstring-comment-match-normal (limit)
"When font locking strings, call this function for normal character vectors. "When font locking strings, call this function for normal character vectors.
Argument LIMIT is the maximum distance to scan." Argument LIMIT is the maximum distance to scan."
(matlab-font-lock-charvec-match-here (when (and (< (point) limit)
(concat matlab-string-start-regexp (re-search-forward matlab-font-lock-string-and-comment-start-regexp limit t))
"\\('" matlab-charvec-end-regexp "\\)"
"\\([^']\\|$\\)")
limit))
(defun matlab-font-lock-charvec-match-unterminated (limit) ;; We might match a comment, string or unterminated string.
"When font locking character vectors, call this function for normal char vectors. (let ((strchar (preceding-char))
Argument LIMIT is the maximum distance to scan." (b0 (or (match-beginning 2) (match-beginning 3)))
(matlab-font-lock-charvec-match-here (bs nil)
(concat matlab-string-start-regexp "\\('[^'\n]*\\(''[^'\n]*\\)*\\)$") (es nil)
limit)) (bu nil)
(eu nil)
(bc nil)
(ec nil)
(done nil)
(searchlim (point-at-eol)))
(defun matlab-font-lock-charvec-match-here (regex limit) ;; Identify the thing we just found, and do different things based on that.
"When font-locking character vectors, call this function to determine a match. (if (eq strchar ?%)
Argument REGEX is the expression to scan for. Match 2 must be the string.
Argument LIMIT is the maximum distance to scan."
(let (e)
(while (and (re-search-forward regex limit t)
(progn (progn
;; This gets us out of a comment after the string. ;; Comments go to the end of the line, making this part simple
(setq e (match-end 2)) (setq bc b0
(goto-char (match-beginning 2)) ec (point-at-eol))
(prog1 )
(or (matlab-cursor-in-comment)
(if (bolp) nil ;; Not a comment, must be a string
(save-excursion
(forward-char -1) ;; Scan to end of string by looking at every matching
(matlab-cursor-in-string)))) ;; string character and deciding what it means.
(goto-char e)))) (while (and (not done)
(setq e nil)) (re-search-forward "['\"]" searchlim t))
(if (not e) (goto-char (match-end 0))
nil (if (eq (preceding-char) strchar)
(goto-char e) ;; Same type of string
(if (eq (following-char) strchar)
;; This is a quoted quote. Skip it and keep going.
(forward-char 1)
;; solo quote, end of string
(setq bs b0
es (point)
done t))
;; The other type of string - just keep going.
nil))
;; If not done, unterminated
(if (not done)
(setq bu b0
eu searchlim))
)
;; Fake out some match data
(set-match-data
(list
b0 (or es eu ec)
bs es ; matched string
bu eu ; unterminated string
bc ec ; comment
))
;; Move to the end
(goto-char (or es eu ec))
;; Successful string
t))) t)))
;;; Font Lock String Scalar Constants
;; To quote a quote, put two in a row, thus we need an anchored
;; first quote. In addition, we don't want to color strings in comments.
(defvar matlab-stringscalar-end-regexp "[^\"\n]*\\(\"\"[^\"\n]*\\)*\""
"Regexp used to represent the character pattern for ending a string scalar.
Two quotes in a row quote in a quote to the string scalar.")
(defun matlab-font-lock-stringscalar-match-normal (limit)
"When font locking strings, call this function for normal character vectors.
Argument LIMIT is the maximum distance to scan."
(matlab-font-lock-stringscalar-match-here
(concat matlab-string-start-regexp
"\\(\"" matlab-stringscalar-end-regexp "\\)"
"\\([^\"]\\|$\\)")
limit))
(defun matlab-font-lock-stringscalar-match-unterminated (limit)
"When font locking character vectors, call this function for normal char vectors.
Argument LIMIT is the maximum distance to scan."
(matlab-font-lock-stringscalar-match-here
(concat matlab-string-start-regexp "\\(\"[^\"\n]*\\(\"\"[^\"\n]*\\)*\\)$")
limit))
(defun matlab-font-lock-stringscalar-match-here (regex limit)
"When font-locking character vectors, call this function to determine a match.
Argument REGEX is the expression to scan for. Match 2 must be the string.
Argument LIMIT is the maximum distance to scan."
(let (e)
(while (and (re-search-forward regex limit t)
(progn
;; This gets us out of a comment after the string.
(setq e (match-end 2))
(goto-char (match-beginning 2))
(prog1
(or (matlab-cursor-in-comment)
(if (bolp) nil
(save-excursion
(forward-char -1)
(matlab-cursor-in-string))))
(goto-char e))))
(setq e nil))
(if (not e)
nil
(goto-char e)
t)))
;;; Font Lock Comment and Unreachable Code Matchers ;;; Font Lock Comment and Unreachable Code Matchers
;;
(defun matlab-font-lock-comment-match (limit) ;(defun matlab-font-lock-comment-match (limit)
"When font-locking comments, call this function to determine a match. ; "When font-locking comments, call this function to determine a match.
Argument LIMIT is the maximum distance to scan." ;Argument LIMIT is the maximum distance to scan."
(let (e) ; (let (e)
(while (and (re-search-forward "\\(%[^%\n]*\\)" limit t) ; (while (and (re-search-forward "\\(%[^%\n]*\\)" limit t)
(progn ; (progn
(setq e (match-end 1)) ; (setq e (match-end 1))
(member (get-text-property (match-beginning 0) 'face) ; (member (get-text-property (match-beginning 0) 'face)
'(font-lock-string-face ; '(font-lock-string-face
matlab-unterminated-string-face)))) ; matlab-unterminated-string-face))))
(setq e nil)) ; (setq e nil))
(if (not e) ; (if (not e)
nil ; nil
(goto-char e) ; (goto-char e)
t))) ; t)))
(defun matlab-find-block-comments (limit) (defun matlab-find-block-comments (limit)
"Find code that is commented out with %{ until %}. "Find code that is commented out with %{ until %}.
@ -1005,23 +999,19 @@ Customizing this variable is only useful if `regexp-opt' is available."
;; font-lock keywords ;; font-lock keywords
(defvar matlab-font-lock-keywords (defvar matlab-font-lock-keywords
(list (list
;; charvec quote chars are also used as transpose, but only if directly ;; charvec and string quote chars are also used as transpose, but only if directly
;; after characters, numbers, underscores, or closing delimiters. ;; after characters, numbers, underscores, or closing delimiters.
'(matlab-font-lock-charvec-match-normal 2 font-lock-string-face) '(matlab-font-lock-allstring-comment-match-normal
;; A charvec with no termination is not currently highlighted. (1 font-lock-string-face nil t)
(2 matlab-unterminated-string-face nil t)
(3 font-lock-comment-face nil t))
;; A charvec and string with no termination is not currently highlighted.
;; This will show that the string needs some attention. ;; This will show that the string needs some attention.
'(matlab-font-lock-charvec-match-unterminated ;'(matlab-font-lock-allstring-match-unterminated
2 matlab-unterminated-string-face) ; 2 matlab-unterminated-string-face)
;; Stringscalar quote chars are also used as transpose, but only if directly
;; after characters, numbers, underscores, or closing delimiters.
'(matlab-font-lock-stringscalar-match-normal 2 font-lock-string-face)
;; A stringscalar with no termination is not currently highlighted.
;; This will show that the string needs some attention.
'(matlab-font-lock-stringscalar-match-unterminated
2 matlab-unterminated-string-face)
;; Comments must occur after the string, that way we can check to see ;; Comments must occur after the string, that way we can check to see
;; if the comment start char has occurred inside our string. (EL) ;; if the comment start char has occurred inside our string. (EL)
'(matlab-font-lock-comment-match 1 font-lock-comment-face) ;'(matlab-font-lock-comment-match 1 font-lock-comment-face)
;; Various pragmas should be in different colors. ;; Various pragmas should be in different colors.
;; I think pragmas are always lower case? ;; I think pragmas are always lower case?
'("%#\\([a-z]+\\)" (1 'bold prepend)) '("%#\\([a-z]+\\)" (1 'bold prepend))
@ -1134,7 +1124,7 @@ ui\\(cont\\(ext\\(\\|menu\\)\\|rol\\)\\|menu\\|\
;; Since it's a math language, how bout dem symbols? ;; Since it's a math language, how bout dem symbols?
'("\\([<>~]=?\\|\\.[/*^']\\|==\\|\\<xor\\>\\|[-!^&|*+\\/~:]\\)" '("\\([<>~]=?\\|\\.[/*^']\\|==\\|\\<xor\\>\\|[-!^&|*+\\/~:]\\)"
1 font-lock-type-face) 1 font-lock-type-face)
'("[]A-Za-z0-9_}']\\('+\\)" 1 font-lock-type-face) '("[]A-Za-z0-9_\"})']\\('+\\)" 1 font-lock-type-face)
;; How about references in the HELP text. ;; How about references in the HELP text.
(list (concat "^" matlab-comment-line-s "\\s-*" (list (concat "^" matlab-comment-line-s "\\s-*"
"\\(\\([A-Z]+\\s-*=\\s-+\\|\\[[^]]+]\\s-*=\\s-+\\|\\)" "\\(\\([A-Z]+\\s-*=\\s-+\\|\\[[^]]+]\\s-*=\\s-+\\|\\)"
@ -1149,7 +1139,7 @@ ui\\(cont\\(ext\\(\\|menu\\)\\|rol\\)\\|menu\\|\
'(1 font-lock-reference-face prepend)) '(1 font-lock-reference-face prepend))
;; continuation ellipsis. ;; continuation ellipsis.
'("[^.]\\(\\.\\.\\.+\\)\\([^\n]*\\)" (1 'underline) '("[^.]\\(\\.\\.\\.+\\)\\([^\n]*\\)" (1 'underline)
(2 font-lock-comment-face)) (2 font-lock-comment-face prepend))
;; How about debugging statements? ;; How about debugging statements?
;;'("\\<\\(db\\sw+\\)\\>" 1 'bold) ;;'("\\<\\(db\\sw+\\)\\>" 1 'bold)
(list (list
@ -2266,114 +2256,73 @@ line."
(match-string 1)) (match-string 1))
(t nil)))) (t nil))))
(defun matlab-cursor-in-string-or-comment () (defun matlab-cursor-comment-string-context ()
"Return t if the cursor is in a valid MATLAB comment or string." "Return the comment/string context of cursor for the current line.
;; comment and string depend on each other. Here is one test Return 'comment if in a comment.
;; that does both. Return 'string if in a string.
Return 'charvector if in a character vector
Return nil if none of the above.
Scans from the beginning of line to determine the contenxt."
(save-match-data
(save-restriction (save-restriction
(narrow-to-region (matlab-point-at-bol) (matlab-point-at-eol)) (narrow-to-region (matlab-point-at-bol) (matlab-point-at-eol))
(let ((p (1+ (point))) (let ((p (1+ (point)))
(returnme nil) (returnme nil)
(sregex (concat matlab-string-start-regexp "\\('\\|\"\\)"))) (sregex (concat "\\(%\\)\\|" matlab-string-start-regexp "\\('\\|\"\\)"))
(insregex "\\('\\|\"\\)"))
(save-excursion (save-excursion
(goto-char (point-min)) (goto-char (point-min))
(while (and (re-search-forward (while (and (not (eq returnme 'comment))
(concat "'\\|\"\\|%\\|" (regexp-quote matlab-elipsis-string)) (re-search-forward (if (not returnme) sregex insregex) nil t)
nil t)
(<= (point) p)) (<= (point) p))
(if (or (= ?% (preceding-char)) (cond ((eq returnme nil)
(= ?. (preceding-char))) (cond ((= (preceding-char) ?%)
;; Here we are in a comment for the rest of it. (setq returnme 'comment))
(progn ((= (preceding-char) ?')
(goto-char p) (setq returnme 'charvector))
(setq returnme t)) ((= (preceding-char) ?\")
;; Here, we could be a string start, or transpose... (setq returnme 'string))))
(if (or (= (current-column) 1) ;; ((eq returnme comment) % not possible)
(save-excursion (forward-char -2) ((eq returnme 'string)
(looking-at sregex))) (cond ((= (preceding-char) ?%) nil) ; ok
;; a valid string start, find the end ((= (preceding-char) ?') nil) ; ok
(let ((f (if (= (preceding-char) ?') ((= (preceding-char) ?\")
;; We need to know what kind of string we are finishing to (if (looking-at "\"")
;; pick the right end construct. (forward-char 1) ; skip quoted quote
(re-search-forward matlab-charvec-end-regexp nil t) (setq returnme nil))))) ; end of string
(re-search-forward matlab-stringscalar-end-regexp nil t)))) ((eq returnme 'charvector)
(if f (cond ((= (preceding-char) ?%) nil) ; ok
(setq returnme (> (point) p)) ((= (preceding-char) ?\") nil) ; ok
(setq returnme t))) ((= (preceding-char) ?')
;; Ooops, a transpose, keep going. (if (looking-at "'")
)))) (forward-char 1) ; skip quoted quote
returnme))) (setq returnme nil))))) ; end of charvec
(t (message "Bug in `matlab-cursor-comment-string-context'"))))
;; Return the identified context.
returnme)))))
(defun matlab-cursor-in-string-or-comment ()
"Return t if the cursor is in a valid MATLAB comment or string."
;; comment and string depend on each other. Here is one test
;; that does both.
(let ((ctxt (matlab-cursor-comment-string-context)))
(if (not ctxt)
nil
t)))
(defun matlab-cursor-in-comment () (defun matlab-cursor-in-comment ()
"Return t if the cursor is in a valid MATLAB comment." "Return t if the cursor is in a valid MATLAB comment."
(save-match-data (let ((ctxt (matlab-cursor-comment-string-context)))
(save-restriction (eq ctxt 'comment)))
(narrow-to-region (matlab-point-at-bol) (matlab-point-at-eol))
(save-excursion
(let ((prev-match nil))
(while (and (re-search-backward
(concat "%\\|" (regexp-quote matlab-elipsis-string) "+")
nil t)
(not (matlab-cursor-in-string)))
(setq prev-match (point)))
(if (and prev-match (matlab-cursor-in-string))
(goto-char prev-match))
(and (looking-at (concat "%\\|"
(regexp-quote matlab-elipsis-string)))
(not (matlab-cursor-in-string))))))))
(defun matlab-cursor-in-string (&optional incomplete) (defun matlab-cursor-in-string (&optional incomplete)
"Return t if the cursor is in a valid MATLAB character vector or string scalar. "Return t if the cursor is in a valid MATLAB character vector or string scalar.
Note: INCOMPLETE is now obsolete
If the optional argument INCOMPLETE is non-nil, then return t if we If the optional argument INCOMPLETE is non-nil, then return t if we
are in what could be a an incomplete string." are in what could be a an incomplete string. (Note: this is also the default)"
(let ((m (match-data)) (let ((ctxt (matlab-cursor-comment-string-context)))
(returnme nil)) (or (eq ctxt 'string) (eq ctxt 'charvector))))
(save-restriction
(narrow-to-region (matlab-point-at-bol) (matlab-point-at-eol))
(let ((p (1+ (point)))
(sregex (concat matlab-string-start-regexp "\\('\\|\"\\)" ))
(instring nil))
(save-excursion
;; Comment hunters need strings to not call the comment
;; identifiers. Thus, this routines must be savvy of comments
;; without recursing to them.
(goto-char (point-min))
(while (or (and instring (looking-at "['\"]"))
(and (re-search-forward
(concat "'\\|\"\\|%\\|"
(regexp-quote matlab-elipsis-string))
nil t)
(<= (point) p)
;; Short circuit to fix this.
(progn (setq instring nil) t)))
;; The next line emulates re-search-foward
(if instring (goto-char (match-end 0)))
(if (or (= ?% (preceding-char))
(= ?. (preceding-char)))
;; Here we are in a comment for the rest of it.
;; thus returnme is a force-false.
(goto-char p)
;; Here, we could be in a string start, or transpose...
(if (or (= (current-column) 1)
instring
(save-excursion (forward-char -2)
(looking-at sregex)))
;; a valid string start, find the end
(let ((f (if (= (preceding-char) ?')
;; We need to know what kind of string we are finishing to
;; pick the right end construct.
(re-search-forward matlab-charvec-end-regexp nil t)
(re-search-forward matlab-stringscalar-end-regexp nil t))))
(if (and (not f) incomplete)
(setq returnme t)
(setq returnme (> (point) p))
(setq instring t)))
;; Ooops, a transpose, keep going.
))))))
(set-match-data m)
returnme))
(defun matlab-comment-on-line () (defun matlab-comment-on-line ()
"Place the cursor on the beginning of a valid comment on this line. "Place the cursor on the beginning of a valid comment on this line.
@ -3801,8 +3750,9 @@ Argument BEGIN and END mark the region boundary."
(goto-char begin) (goto-char begin)
;; Here we use the font lock function for finding strings. ;; Here we use the font lock function for finding strings.
;; Its cheap, fast, and accurate. ;; Its cheap, fast, and accurate.
(while (and (matlab-font-lock-charvec-match-normal end) ;; NOTE: This now also does comments
(ispell-region (match-beginning 2) (match-end 2)))))) (while (and (matlab-font-lock-allstring-comment-match-normal end)
(ispell-region (match-beginning 0) (match-end 0))))))
(defun matlab-ispell-strings () (defun matlab-ispell-strings ()
"Spell check valid strings in the current buffer with Ispell. "Spell check valid strings in the current buffer with Ispell.