gen1.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. #! /usr/bin/env python2.7
  2. #
  3. # A. V. Shiryaev, 2012.10
  4. #
  5. # generate encoder/decoder UCS-2 <-> SBCS/DBCS-encoding for Component Pascal
  6. import sys
  7. def be2le (x):
  8. y = 0
  9. while x > 0:
  10. y = y * 256 + x % 256
  11. x = x / 256
  12. return y
  13. def nBytes (x):
  14. if x == 0:
  15. return 1
  16. else:
  17. y = 0
  18. while x > 0:
  19. y = y + 1
  20. x = x / 256
  21. return y
  22. def EC (x, p="X"):
  23. if x < 0:
  24. x = -x
  25. s = '-'
  26. else:
  27. s = ''
  28. if x < 0xA0:
  29. return "%s%02X%s" % (s, x, p)
  30. elif x < 256:
  31. return "%s0%02X%s" % (s, x, p)
  32. elif x < 0xA000:
  33. return "%s%04X%s" % (s, x, p)
  34. else:
  35. return "%s0%04X%s" % (s, x, p)
  36. def gen (modName, r, head, head0=None):
  37. e = head
  38. if len(e) > 0:
  39. if head0:
  40. head0S = '%s\n\n\t\t' % (head0,)
  41. else:
  42. head0S = ''
  43. errS = """
  44. (*
  45. %sErrors:
  46. %s
  47. *)
  48. """ % (head0S, '\n'.join(e),)
  49. else:
  50. if head0:
  51. errS = "\n\t(* %s *)\n" % (head0,)
  52. else:
  53. errS = ''
  54. ### encoder
  55. r.sort(key=lambda x: x[1])
  56. encS = []
  57. rs = 0
  58. checkDBCS = False
  59. for i, x in r:
  60. # i: encoding char in big-endian
  61. # x: ucs-2
  62. nb = nBytes(i)
  63. if nb > 1:
  64. checkDBCS = True
  65. if rs == 0:
  66. rs = 1
  67. rsi = i
  68. rsnb = nb
  69. rsx = x
  70. rsn = 0
  71. elif rs == 1:
  72. if (nb == rsnb) and (i == rsi + rsn + 1) and (x == rsx + rsn + 1):
  73. rsn = rsn + 1
  74. else:
  75. if rsnb == 1:
  76. if rsn == 0:
  77. encS.append("\t\t\t| %s: y := %s" % (EC(rsx, "H"), EC(rsi, "H")))
  78. elif rsi == rsx:
  79. encS.append("\t\t\t| %s..%s: y := x" % (EC(rsx, "H"), EC(rsx + rsn, "H")))
  80. elif rsi > rsx:
  81. encS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsi - rsx, "H")))
  82. else: # rsi < rsx
  83. encS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsx - rsi, "H")))
  84. elif rsnb == 2:
  85. if rsn == 0:
  86. encS.append("\t\t\t| %s: y := %s" % (EC(rsx, "H"), EC(rsi, "H")))
  87. else:
  88. encS.append("\t\t\t| %s..%s: y := %s + x" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsi - rsx, "H")))
  89. elif rsnb == 3:
  90. assert False # not implemented
  91. else:
  92. assert False # not implemented
  93. rsi = i
  94. rsnb = nb
  95. rsx = x
  96. rsn = 0
  97. else:
  98. assert False
  99. if rs == 1:
  100. if rsnb == 1:
  101. if rsn == 0:
  102. encS.append("\t\t\t| %s: y := %s" % (EC(rsx, "H"), EC(rsi, "H")))
  103. elif rsi == rsx:
  104. encS.append("\t\t\t| %s..%s: y := x" % (EC(rsx, "H"), EC(rsx + rsn, "H")))
  105. elif rsi > rsx:
  106. encS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsi - rsx, "H")))
  107. else: # rsi < rsx
  108. encS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsx - rsi, "H")))
  109. elif rsnb == 2:
  110. if rsn == 0:
  111. encS.append("\t\t\t| %s: y := %s" % (EC(rsx, "H"), EC(rsi, "H")))
  112. else:
  113. encS.append("\t\t\t| %s..%s: y := %s + x" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsi - rsx, "H")))
  114. elif rsnb == 3:
  115. assert False # not implemented
  116. else:
  117. assert False # not implemented
  118. if checkDBCS:
  119. s1 = """IF y < 256 THEN
  120. t[tW] := SHORT(CHR(y)); INC(tW)
  121. ELSE
  122. t[tW] := SHORT(CHR(y DIV 100H)); t[tW+1] := SHORT(CHR(y MOD 100H)); INC(tW, 2)
  123. END;"""
  124. else:
  125. s1 = """t[tW] := SHORT(CHR(y)); INC(tW);"""
  126. ### decoder
  127. r.sort(key=lambda x: x[0])
  128. decS = []
  129. rs = 0
  130. if checkDBCS:
  131. decS.append('\t\t\tCASE d.st OF 0:')
  132. decS.append('\t\t\t\tCASE x OF')
  133. for i, x in r:
  134. # i: encoding char in big-endian
  135. # x: ucs-2
  136. nb = nBytes(i)
  137. if rs == 0:
  138. rs = 1
  139. rsi = i
  140. rsnb = nb
  141. rsx = x
  142. rsn = 0
  143. elif rs == 1:
  144. if (nb == rsnb) and (i == rsi + rsn + 1) and (x == rsx + rsn + 1):
  145. rsn = rsn + 1
  146. else:
  147. if rsnb == 1:
  148. if rsn == 0:
  149. decS.append("\t\t\t\t| %s: t[tW] := %s; INC(tW)" % (EC(rsi, "H"), EC(rsx, "X")))
  150. elif rsi == rsx:
  151. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  152. elif rsx > rsi:
  153. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x + %s); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  154. else: # rsx < rsi
  155. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x - %s); INC(tW)" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsi - rsx, "H")))
  156. elif rsnb == 2:
  157. #if rsn == 0:
  158. # decS.append("\t\t\t\t| %s: d.b := x; INC(d.st)" % (EC(rsi / 256, "H"),))
  159. #else:
  160. # decS.append("\t\t\t\t| %s..%s: d.b := x; INC(d.st)" % (EC(rsi / 256, "H"), EC(rsi + rsn, "H")))
  161. pass # TODO
  162. elif rsnb == 3:
  163. assert False # not implemented
  164. else:
  165. assert False # not implemented
  166. rsi = i
  167. rsnb = nb
  168. rsx = x
  169. rsn = 0
  170. else:
  171. assert False
  172. # TODO
  173. decS.append('\t\t\t\tELSE d.st := -1; RETURN END')
  174. decS.append('\t\t\t| 1:')
  175. decS.append('\t\t\t\tCASE x OF')
  176. # TODO
  177. decS.append('\t\t\t\tELSE d.st := -1; RETURN END')
  178. decS.append('\t\t\tEND;')
  179. else:
  180. decS.append('\t\t\tCASE x OF')
  181. for i, x in r:
  182. if rs == 0:
  183. rs = 1
  184. rsi = i
  185. rsx = x
  186. rsn = 0
  187. elif rs == 1:
  188. if (i == rsi + rsn + 1) and (x == rsx + rsn + 1):
  189. rsn = rsn + 1
  190. else:
  191. if rsn == 0:
  192. decS.append("\t\t\t| %s: y := %s" % (EC(rsi, "H"), EC(rsx, "H")))
  193. elif rsi == rsx:
  194. decS.append("\t\t\t| %s..%s: y := x" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  195. elif rsx > rsi:
  196. decS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  197. else: # rsx < rsi
  198. decS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  199. rsi = i
  200. rsx = x
  201. rsn = 0
  202. else:
  203. assert False
  204. if rs == 1:
  205. if rsn == 0:
  206. decS.append("\t\t\t| %s: y := %s" % (EC(rsi, "H"), EC(rsx, "H")))
  207. elif rsi == rsx:
  208. decS.append("\t\t\t| %s..%s: y := x" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  209. elif rsx > rsi:
  210. decS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  211. else: # rsx < rsi
  212. decS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  213. decS.append('\t\t\tELSE\n\t\t\t\td.st := -1; RETURN\n\t\t\tEND;')
  214. decS.append('\n\t\t\tt[tW] := CHR(y); INC(tW);')
  215. return """MODULE %s;
  216. (* This file was generated automatically *)
  217. %s
  218. IMPORT Codecs := EncCodecs;
  219. TYPE
  220. Encoder = POINTER TO RECORD (Codecs.Encoder) END;
  221. Decoder = POINTER TO RECORD (Codecs.Decoder)
  222. b: INTEGER;
  223. st: INTEGER; (* 0 - no state, > 0 - number of chars expected, -1 - error *)
  224. END;
  225. (* Encoder *)
  226. PROCEDURE (e: Encoder) Encode (IN f: ARRAY OF CHAR; VAR fR, fLen: INTEGER; VAR t: ARRAY OF SHORTCHAR; VAR tW: INTEGER);
  227. VAR x, y: INTEGER;
  228. BEGIN
  229. WHILE fLen > 0 DO
  230. x := ORD(f[fR]);
  231. CASE x OF
  232. %s
  233. ELSE
  234. RETURN
  235. END;
  236. %s
  237. INC(fR);
  238. DEC(fLen)
  239. END
  240. END Encode;
  241. PROCEDURE NewEncoder* (): Codecs.Encoder;
  242. VAR e: Encoder;
  243. BEGIN
  244. NEW(e); RETURN e
  245. END NewEncoder;
  246. (* Decoder *)
  247. PROCEDURE (d: Decoder) Decode (IN f: ARRAY OF SHORTCHAR; VAR fR, fLen: INTEGER; VAR t: ARRAY OF CHAR; VAR tW: INTEGER; OUT state: BOOLEAN);
  248. VAR x, y: INTEGER;
  249. BEGIN
  250. WHILE fLen > 0 DO
  251. x := ORD(f[fR]);
  252. %s
  253. INC(fR);
  254. DEC(fLen)
  255. END;
  256. IF d.st = 0 THEN state := FALSE
  257. ELSIF d.st > 0 THEN state := TRUE
  258. ELSE HALT(100)
  259. END
  260. END Decode;
  261. PROCEDURE (d: Decoder) Reset;
  262. BEGIN
  263. d.st := 0
  264. END Reset;
  265. PROCEDURE NewDecoder* (): Codecs.Decoder;
  266. VAR d: Decoder;
  267. BEGIN
  268. NEW(d); d.Reset; RETURN d
  269. END NewDecoder;
  270. END %s.""" % (modName, errS, '\n'.join(encS), s1, '\n'.join(decS), modName)