-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcharset.lisp
170 lines (156 loc) · 6.61 KB
/
charset.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
(in-package #:ctype)
;;;; a lot of the code in this file is cribbed from sbcl's character-set types.
(defmethod ctypep (object (ct charset))
(and (characterp object)
(loop with code = (char-code object)
for (begin . end) in (charset-pairs ct)
when (<= begin code end)
return t
finally (return nil))))
(defmethod subctypep ((ct1 charset) (ct2 charset))
(values
(flet ((subrangep (pair1 pair2)
(let ((low1 (car pair1)) (high1 (cdr pair1))
(low2 (car pair2)) (high2 (cdr pair2)))
(and (>= low1 low2) (<= high1 high2)))))
(loop with pairs2 = (charset-pairs ct2)
for pair1 in (charset-pairs ct1)
always (position pair1 pairs2 :test #'subrangep)))
t))
(defmethod ctype= ((ct1 charset) (ct2 charset))
(values (equal (charset-pairs ct1) (charset-pairs ct2)) t))
(defmethod disjointp ((ct1 charset) (ct2 charset))
(values
(flet ((overlap-p (pair1 pair2)
(let ((low1 (car pair1)) (high1 (cdr pair1))
(low2 (car pair2)) (high2 (cdr pair2)))
(and (<= low1 high2) (<= low2 high1)))))
(loop with pairs2 = (charset-pairs ct2)
for pair1 in (charset-pairs ct1)
never (position pair1 pairs2 :test #'overlap-p)))
t))
(defmethod conjointp ((ct1 charset) (ct2 charset)) (values nil t))
(defmethod cofinitep ((ct charset)) (values nil t))
(defun negate-charset-pairs (pairs)
(if (null pairs)
`((0 . ,(1- char-code-limit)))
(let ((not-pairs nil))
(when (plusp (caar pairs))
(push (cons 0 (1- (caar pairs))) not-pairs))
(loop for tail on pairs
for high1 = (cdar tail)
for low2 = (caadr tail)
until (null (rest tail))
do (push (cons (1+ high1) (1- low2)) not-pairs)
finally (when (< (cdar tail) (1- char-code-limit))
(push (cons (1+ (cdar tail)) (1- char-code-limit))
not-pairs))
(return (nreverse not-pairs))))))
(defmethod negate ((ct charset))
(let ((pairs (charset-pairs ct)))
(if (equal pairs `((0 . ,(1- char-code-limit))))
(call-next-method)
(let ((not-character
(negation
(charset `((0 . ,(1- char-code-limit)))))))
(disjunction
not-character
(charset (negate-charset-pairs pairs)))))))
(defun conjoin-charset-pairs (pairs1 pairs2)
(if (and pairs1 pairs2)
(let ((res nil)
(pair1 (pop pairs1))
(pair2 (pop pairs2)))
(loop
;; Put the higher pair on the right.
(when (> (car pair1) (car pair2))
(rotatef pair1 pair2)
(rotatef pairs1 pairs2))
(let ((pair1-high (cdr pair1)))
(cond
((> (car pair2) pair1-high)
;; no overlap -- discard pair1 and move on
(if (null pairs1)
(return)
(setf pair1 (pop pairs1))))
((<= (cdr pair2) pair1-high)
;; pair2 is a subrange of pair1
(push (cons (car pair2) (cdr pair2)) res)
(cond
((= (cdr pair2) pair1-high)
;; both pairs are now in the result, so advance both
(if (null pairs1)
(return)
(setf pair1 (pop pairs1)))
(if (null pairs2)
(return)
(setf pair2 (pop pairs2))))
(t
;; (< (cdr pair2) pair1-high)
;; so pair2 is a strict subrange of pair1 - advance 2 only,
;; and "modify" pair1 accordingly
(if (null pairs2)
(return)
(setf pair2 (pop pairs2)))
(setf pair1 (cons (1+ (cdr pair2)) pair1-high)))))
(t
;; (> (cdr pair2) (cdr pair1))
;; so the ranges overlap, but only partially.
;; push the overlap and advance 2 and modify 1.
(push (cons (car pair2) pair1-high) res)
(if (null pairs1)
(return)
(setf pair1 (pop pairs1)))
(setf pair2 (cons (1+ pair1-high) (cdr pair2)))))))
;; done
(nreverse res))
;; One of the charsets is degenerate (empty)
;; which ought to have been normalized away, but as long as we're here
nil))
(defmethod conjoin/2 ((ct1 charset) (ct2 charset))
(charset (conjoin-charset-pairs (charset-pairs ct1) (charset-pairs ct2))))
(defun disjoin-charset-pairs (pairs1 pairs2)
(cond
((not pairs1) pairs2)
((not pairs2) pairs1)
(t
(let ((res nil))
(loop (let* ((current (if (> (caar pairs2) (caar pairs1))
(pop pairs1)
(pop pairs2)))
(low (car current)) (high (cdr current)))
;; Keep grabbing overlapping pairs until we run out.
(loop (cond ((and pairs1
(<= (caar pairs1) (1+ high)))
(setf high (max high (cdr (pop pairs1)))))
((and pairs2
(<= (caar pairs2) (1+ high)))
(setf high (max high (cdr (pop pairs2)))))
(t (return)))) ; ran out
(push (cons low high) res)
;; Check to see if we're really done.
(unless (or pairs1 pairs2)
(return (nreverse res)))))))))
(defmethod disjoin/2 ((ct1 charset) (ct2 charset))
(charset (disjoin-charset-pairs (charset-pairs ct1) (charset-pairs ct2))))
(defmethod subtract ((ct1 charset) (ct2 charset))
;; lazy
(charset (conjoin-charset-pairs (charset-pairs ct1)
(negate-charset-pairs (charset-pairs ct2)))))
(defmethod unparse ((ct charset))
(let ((pairs (charset-pairs ct)))
(cond ((equal pairs +standard-charset+)
'standard-char)
((equal pairs +base-charset+)
'base-char)
((equal pairs `((0 . ,(1- char-code-limit))))
'character)
((equal pairs (negate-charset-pairs +standard-charset+))
'(and character (not standard-char)))
((equal pairs (negate-charset-pairs +base-charset+))
'extended-char)
(t ; something weird. do a member type.
`(member
,@(loop for (low . high) in pairs
nconc (loop for i from low upto high
collect (code-char i))))))))