gen1.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. #! /usr/bin/env python2.7
  2. #
  3. # A. V. Shiryaev, 2012.10
  4. #
  5. # generate encoder/decoder UCS-2 <-> SBCS/DBCS-encoding for Component Pascal
  6. import sys
  7. def be2le (x):
  8. y = 0
  9. while x > 0:
  10. y = y * 256 + x % 256
  11. x = x / 256
  12. return y
  13. def nBytes (x):
  14. if x == 0:
  15. return 1
  16. else:
  17. y = 0
  18. while x > 0:
  19. y = y + 1
  20. x = x / 256
  21. return y
  22. def EC (x, p="X"):
  23. if x < 0:
  24. x = -x
  25. s = '-'
  26. else:
  27. s = ''
  28. if x < 0xA0:
  29. return "%s%02X%s" % (s, x, p)
  30. elif x < 256:
  31. return "%s0%02X%s" % (s, x, p)
  32. elif x < 0xA000:
  33. return "%s%04X%s" % (s, x, p)
  34. else:
  35. return "%s0%04X%s" % (s, x, p)
  36. def gen (modName, r, head, head0=None):
  37. e = head
  38. if len(e) > 0:
  39. if head0:
  40. head0S = '%s\n\n\t\t' % (head0,)
  41. else:
  42. head0S = ''
  43. errS = """
  44. (*
  45. %sErrors:
  46. %s
  47. *)
  48. """ % (head0S, '\n'.join(e),)
  49. else:
  50. if head0:
  51. errS = "\n\t(* %s *)\n" % (head0,)
  52. else:
  53. errS = ''
  54. ### encoder
  55. r.sort(key=lambda x: x[1])
  56. encS = []
  57. rs = 0
  58. checkDBCS = False
  59. for i, x in r:
  60. # i: encoding char in big-endian
  61. # x: ucs-2
  62. nb = nBytes(i)
  63. if nb > 1:
  64. checkDBCS = True
  65. if rs == 0:
  66. rs = 1
  67. rsi = i
  68. rsnb = nb
  69. rsx = x
  70. rsn = 0
  71. elif rs == 1:
  72. # nb == rsnb condition is redundant
  73. if (nb == rsnb) and (i == rsi + rsn + 1) and (x == rsx + rsn + 1):
  74. rsn = rsn + 1
  75. else:
  76. if rsn == 0:
  77. encS.append("\t\t\t| %s: y := %s" % (EC(rsx, "H"), EC(rsi, "H")))
  78. elif rsi == rsx:
  79. encS.append("\t\t\t| %s..%s: y := x" % (EC(rsx, "H"), EC(rsx + rsn, "H")))
  80. elif rsi > rsx:
  81. encS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsi - rsx, "H")))
  82. else: # rsi < rsx
  83. encS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsx - rsi, "H")))
  84. rsi = i
  85. rsnb = nb
  86. rsx = x
  87. rsn = 0
  88. else:
  89. assert False
  90. if rs == 1:
  91. if rsn == 0:
  92. encS.append("\t\t\t| %s: y := %s" % (EC(rsx, "H"), EC(rsi, "H")))
  93. elif rsi == rsx:
  94. encS.append("\t\t\t| %s..%s: y := x" % (EC(rsx, "H"), EC(rsx + rsn, "H")))
  95. elif rsi > rsx:
  96. encS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsi - rsx, "H")))
  97. else: # rsi < rsx
  98. encS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsx, "H"), EC(rsx + rsn, "H"), EC(rsx - rsi, "H")))
  99. if checkDBCS:
  100. s1 = """IF y < 256 THEN
  101. t[tW] := SHORT(CHR(y)); INC(tW)
  102. ELSE
  103. t[tW] := SHORT(CHR(y DIV 100H)); t[tW+1] := SHORT(CHR(y MOD 100H)); INC(tW, 2)
  104. END;"""
  105. else:
  106. s1 = """t[tW] := SHORT(CHR(y)); INC(tW);"""
  107. ### decoder
  108. r.sort(key=lambda x: x[0])
  109. decS = []
  110. rs = 0
  111. if checkDBCS:
  112. decS.append('\t\t\tCASE d.st OF 0:')
  113. decS.append('\t\t\t\tCASE x OF')
  114. for i, x in r:
  115. # i: encoding char in big-endian
  116. # x: ucs-2
  117. if nBytes(i) == 1:
  118. if rs == 0:
  119. rs = 1
  120. rsi = i
  121. rsx = x
  122. rsn = 0
  123. elif rs == 1:
  124. if (i == rsi + rsn + 1) and (x == rsx + rsn + 1):
  125. rsn = rsn + 1
  126. else:
  127. if rsn == 0:
  128. decS.append("\t\t\t\t| %s: t[tW] := %s; INC(tW)" % (EC(rsi, "H"), EC(rsx, "X")))
  129. elif rsi == rsx:
  130. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  131. elif rsx > rsi:
  132. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x + %s); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  133. else: # rsx < rsi
  134. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x - %s); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  135. rsi = i
  136. rsx = x
  137. rsn = 0
  138. else:
  139. assert False
  140. else:
  141. break
  142. if rs == 1:
  143. if rsn == 0:
  144. decS.append("\t\t\t\t| %s: t[tW] := %s; INC(tW)" % (EC(rsi, "H"), EC(rsx, "X")))
  145. elif rsi == rsx:
  146. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  147. elif rsx > rsi:
  148. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x + %s); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  149. else: # rsx < rsi
  150. decS.append("\t\t\t\t| %s..%s: t[tW] := CHR(x - %s); INC(tW)" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  151. i0 = set()
  152. for i, x in r:
  153. if nBytes(i) == 2:
  154. i0.add(i / 256)
  155. i0 = list(i0)
  156. i0.sort()
  157. decS.append("\t\t\t\t| %s: d.b := 256 * x; INC(d.st)" % (','.join([ EC(i, "H") for i in i0 ])))
  158. decS.append('\t\t\t\tELSE d.st := -1; RETURN END')
  159. decS.append('\t\t\t| 1:')
  160. decS.append('\t\t\t\tCASE x + d.b OF')
  161. rs = 0
  162. for i, x in r:
  163. if nBytes(i) == 2:
  164. if rs == 0:
  165. rs = 1
  166. rsi = i
  167. rsx = x
  168. rsn = 0
  169. elif rs == 1:
  170. if (i == rsi + rsn + 1) and (x == rsx + rsn + 1):
  171. rsn = rsn + 1
  172. else:
  173. if rsn == 0:
  174. decS.append("\t\t\t\t| %s: y := %s" % (EC(rsi, "H"), EC(rsx, "H")))
  175. elif rsi == rsx:
  176. decS.append("\t\t\t\t| %s..%s: y := x" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  177. elif rsx > rsi:
  178. decS.append("\t\t\t\t| %s..%s: y := x + %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  179. else: # rsx < rsi
  180. decS.append("\t\t\t\t| %s..%s: y := x - %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  181. rsi = i
  182. rsx = x
  183. rsn = 0
  184. else:
  185. assert False
  186. if rs == 1:
  187. if rsn == 0:
  188. decS.append("\t\t\t\t| %s: y := %s" % (EC(rsi, "H"), EC(rsx, "H")))
  189. elif rsi == rsx:
  190. decS.append("\t\t\t\t| %s..%s: y := x" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  191. elif rsx > rsi:
  192. decS.append("\t\t\t\t| %s..%s: y := x + %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  193. else: # rsx < rsi
  194. decS.append("\t\t\t\t| %s..%s: y := x - %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  195. decS.append('\t\t\t\tELSE d.st := -1; RETURN END;')
  196. decS.append('\t\t\t\tt[tW] := CHR(y); INC(tW);')
  197. decS.append('\t\t\t\tDEC(d.st)')
  198. decS.append('\t\t\tEND;')
  199. s0 = """
  200. b: INTEGER;
  201. st: INTEGER; (* 0 - no state, > 0 - number of chars expected, -1 - error *)
  202. """
  203. s2 = """\t\tIF d.st = 0 THEN state := FALSE
  204. ELSIF d.st > 0 THEN state := TRUE
  205. ELSE HALT(100)
  206. END"""
  207. s3 = """;
  208. BEGIN d.st := 0
  209. END Reset"""
  210. s4 = " d.Reset;"
  211. else: # SBCS
  212. decS.append('\t\t\tCASE x OF')
  213. for i, x in r:
  214. if rs == 0:
  215. rs = 1
  216. rsi = i
  217. rsx = x
  218. rsn = 0
  219. elif rs == 1:
  220. if (i == rsi + rsn + 1) and (x == rsx + rsn + 1):
  221. rsn = rsn + 1
  222. else:
  223. if rsn == 0:
  224. decS.append("\t\t\t| %s: y := %s" % (EC(rsi, "H"), EC(rsx, "H")))
  225. elif rsi == rsx:
  226. decS.append("\t\t\t| %s..%s: y := x" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  227. elif rsx > rsi:
  228. decS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  229. else: # rsx < rsi
  230. decS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  231. rsi = i
  232. rsx = x
  233. rsn = 0
  234. else:
  235. assert False
  236. if rs == 1:
  237. if rsn == 0:
  238. decS.append("\t\t\t| %s: y := %s" % (EC(rsi, "H"), EC(rsx, "H")))
  239. elif rsi == rsx:
  240. decS.append("\t\t\t| %s..%s: y := x" % (EC(rsi, "H"), EC(rsi + rsn, "H")))
  241. elif rsx > rsi:
  242. decS.append("\t\t\t| %s..%s: y := x + %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsx - rsi, "H")))
  243. else: # rsx < rsi
  244. decS.append("\t\t\t| %s..%s: y := x - %s" % (EC(rsi, "H"), EC(rsi + rsn, "H"), EC(rsi - rsx, "H")))
  245. decS.append('\t\t\tELSE\n\t\t\t\tRETURN\n\t\t\tEND;')
  246. decS.append('\t\t\tt[tW] := CHR(y); INC(tW);')
  247. s0 = " "
  248. s2 = "\t\tstate := FALSE"
  249. s3 = ", EMPTY"
  250. s4 = ""
  251. return """MODULE %s;
  252. (* This file was generated automatically *)
  253. %s
  254. IMPORT Codecs := EncCodecs;
  255. TYPE
  256. Encoder = POINTER TO RECORD (Codecs.Encoder) END;
  257. Decoder = POINTER TO RECORD (Codecs.Decoder)%sEND;
  258. (* Encoder *)
  259. PROCEDURE (e: Encoder) Encode (IN f: ARRAY OF CHAR; VAR fR, fLen: INTEGER; VAR t: ARRAY OF SHORTCHAR; VAR tW: INTEGER);
  260. VAR x, y: INTEGER;
  261. BEGIN
  262. WHILE fLen > 0 DO
  263. x := ORD(f[fR]);
  264. CASE x OF
  265. %s
  266. ELSE
  267. RETURN
  268. END;
  269. %s
  270. INC(fR); DEC(fLen)
  271. END
  272. END Encode;
  273. PROCEDURE NewEncoder* (): Codecs.Encoder;
  274. VAR e: Encoder;
  275. BEGIN
  276. NEW(e); RETURN e
  277. END NewEncoder;
  278. (* Decoder *)
  279. PROCEDURE (d: Decoder) Decode (IN f: ARRAY OF SHORTCHAR; VAR fR, fLen: INTEGER; VAR t: ARRAY OF CHAR; VAR tW: INTEGER; OUT state: BOOLEAN);
  280. VAR x, y: INTEGER;
  281. BEGIN
  282. WHILE fLen > 0 DO
  283. x := ORD(f[fR]);
  284. %s
  285. INC(fR); DEC(fLen)
  286. END;
  287. %s
  288. END Decode;
  289. PROCEDURE (d: Decoder) Reset%s;
  290. PROCEDURE NewDecoder* (): Codecs.Decoder;
  291. VAR d: Decoder;
  292. BEGIN
  293. NEW(d);%s RETURN d
  294. END NewDecoder;
  295. END %s.""" % (modName, errS, s0, '\n'.join(encS), s1, '\n'.join(decS), s2, s3, s4, modName)