gen1.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. #! /usr/bin/env python2.7
  2. #
  3. # A. V. Shiryaev, 2012.10
  4. #
  5. # generate encoder/decoder UCS-2 <-> SBCS/DBCS-encoding for Component Pascal
  6. import sys
  7. def nBytes (x):
  8. if x == 0:
  9. return 1
  10. else:
  11. y = 0
  12. while x > 0:
  13. y = y + 1
  14. x = x / 256
  15. return y
  16. def EC (x, p="X"):
  17. if x < 0:
  18. x = -x
  19. s = '-'
  20. else:
  21. s = ''
  22. if x < 0xA0:
  23. return "%s%02X%s" % (s, x, p)
  24. elif x < 256:
  25. return "%s0%02X%s" % (s, x, p)
  26. elif x < 0xA000:
  27. return "%s%04X%s" % (s, x, p)
  28. else:
  29. return "%s0%04X%s" % (s, x, p)
  30. def RC (x, p="X"):
  31. x = list(x) # copy
  32. x.sort()
  33. s = []
  34. st = 0
  35. for i in x:
  36. if st == 0:
  37. sti = i
  38. stn = 0
  39. st = 1
  40. elif st == 1:
  41. if i == sti + stn + 1:
  42. stn = stn + 1
  43. else:
  44. if stn == 0:
  45. s.append(EC(sti, p))
  46. elif stn == 1:
  47. s.append(EC(sti, p))
  48. s.append(EC(sti + 1, p))
  49. else:
  50. s.append("%s..%s" % (EC(sti, p), EC(sti + stn, p)))
  51. sti = i
  52. stn = 0
  53. if st == 1:
  54. if stn == 0:
  55. s.append(EC(sti, p))
  56. elif stn == 1:
  57. s.append(EC(sti, p))
  58. s.append(EC(sti + 1, p))
  59. else:
  60. s.append("%s..%s" % (EC(sti, p), EC(sti + stn, p)))
  61. return ','.join(s)
  62. # for encoder
  63. def opt0 (r):
  64. o = {}
  65. for i, x in r:
  66. ofs = i - x
  67. o.setdefault(ofs, []).append(x)
  68. return o
  69. # for decoder
  70. def opt1 (r, nb):
  71. o = {}
  72. for i, x in r:
  73. if nBytes(i) == nb:
  74. ofs = x - i
  75. o.setdefault(ofs, []).append(i)
  76. return o
  77. def gen (modName, r, head, head0=None):
  78. e = head
  79. if len(e) > 0:
  80. if head0:
  81. head0S = '%s\n\n\t\t' % (head0,)
  82. else:
  83. head0S = ''
  84. errS = """
  85. (*
  86. %sErrors:
  87. %s
  88. *)
  89. """ % (head0S, '\n'.join(e),)
  90. else:
  91. if head0:
  92. errS = "\n\t(* %s *)\n" % (head0,)
  93. else:
  94. errS = ''
  95. ### calc max num of SHORTCHARs per CHAR
  96. maxN = 1
  97. for i, x in r:
  98. # i: encoding char in big-endian
  99. # x: ucs-2
  100. nb = nBytes(i)
  101. if nb > maxN:
  102. maxN = nb
  103. ### encoder
  104. r.sort(key=lambda x: x[1])
  105. encS = []
  106. o = opt0(r)
  107. for k, v in o.iteritems():
  108. if len(v) == 1:
  109. encS.append("\t\t\t| %s: y := %s" % (EC(v[0], "H"), EC(k + v[0], "H")))
  110. else:
  111. encS.append("\t\t\t| %s: y := %s + x" % (RC(v, "H"), EC(k, "H")))
  112. if maxN == 2: # DBCS
  113. s1 = """IF y < 256 THEN
  114. t[tW] := SHORT(CHR(y)); INC(tW)
  115. ELSE
  116. t[tW] := SHORT(CHR(y DIV 100H)); t[tW+1] := SHORT(CHR(y MOD 100H)); INC(tW, 2)
  117. END;"""
  118. elif maxN == 1: # SBCS
  119. s1 = """t[tW] := SHORT(CHR(y)); INC(tW);"""
  120. else:
  121. assert False # not implemented
  122. ### decoder
  123. r.sort(key=lambda x: x[0])
  124. decS = []
  125. rs = 0
  126. if maxN == 2: # DBCS
  127. decS.append('\t\t\tCASE d.st OF 0:')
  128. decS.append('\t\t\t\tCASE x OF')
  129. o = opt1(r, 1)
  130. for k, v in o.iteritems():
  131. if len(v) == 1:
  132. decS.append("\t\t\t\t| %s: t[tW] := %s; INC(tW)" % (EC(v[0], "H"), EC(k + v[0], "X")))
  133. else:
  134. decS.append("\t\t\t\t| %s: t[tW] := CHR(%s + x); INC(tW)" % (RC(v, "H"), EC(k, "H")))
  135. i0 = set()
  136. for i, x in r:
  137. if nBytes(i) == 2:
  138. i0.add(i / 256)
  139. i0 = list(i0)
  140. i0.sort()
  141. decS.append("\t\t\t\t| %s: d.b := 256 * x; INC(d.st)" % (RC(i0, "H"),))
  142. decS.append('\t\t\t\tELSE d.st := -1; RETURN END')
  143. decS.append('\t\t\t| 1:')
  144. decS.append('\t\t\t\tCASE x + d.b OF')
  145. o = opt1(r, 2)
  146. for k, v in o.iteritems():
  147. if len(v) == 1:
  148. decS.append("\t\t\t\t| %s: y := %s" % (EC(v[0], "H"), EC(k + v[0], "H")))
  149. else:
  150. decS.append("\t\t\t\t| %s: y := %s + x" % (RC(v, "H"), EC(k, "H")))
  151. decS.append('\t\t\t\tELSE d.st := -1; RETURN END;')
  152. decS.append('\t\t\t\tt[tW] := CHR(y); INC(tW);')
  153. decS.append('\t\t\t\tDEC(d.st)')
  154. decS.append('\t\t\tEND;')
  155. s0 = """
  156. b: INTEGER;
  157. st: INTEGER; (* 0 - no state, > 0 - number of chars expected, -1 - error *)
  158. """
  159. s2 = """\t\tIF d.st = 0 THEN state := FALSE
  160. ELSIF d.st > 0 THEN state := TRUE
  161. ELSE HALT(100)
  162. END"""
  163. s3 = """;
  164. BEGIN d.st := 0
  165. END Reset"""
  166. s4 = " d.Reset;"
  167. elif maxN == 1: # SBCS
  168. decS.append('\t\t\tCASE x OF')
  169. o = opt1(r, 1)
  170. for k, v in o.iteritems():
  171. if len(v) == 1:
  172. decS.append("\t\t\t| %s: y := %s" % (EC(v[0], "H"), EC(k + v[0], "H")))
  173. else:
  174. decS.append("\t\t\t| %s: y := %s + x" % (RC(v, "H"), EC(k, "H")))
  175. decS.append('\t\t\tELSE\n\t\t\t\tRETURN\n\t\t\tEND;')
  176. decS.append('\t\t\tt[tW] := CHR(y); INC(tW);')
  177. s0 = " "
  178. s2 = "\t\tstate := FALSE"
  179. s3 = ", EMPTY"
  180. s4 = ""
  181. else:
  182. assert False # not implemented
  183. return """MODULE %s;
  184. (* This file was generated automatically *)
  185. %s
  186. IMPORT Codecs := EncCodecs;
  187. TYPE
  188. Encoder = POINTER TO RECORD (Codecs.Encoder) END;
  189. Decoder = POINTER TO RECORD (Codecs.Decoder)%sEND;
  190. (* Encoder *)
  191. PROCEDURE (e: Encoder) Encode (IN f: ARRAY OF CHAR; VAR fR, fLen: INTEGER; VAR t: ARRAY OF SHORTCHAR; VAR tW: INTEGER);
  192. VAR x, y: INTEGER;
  193. BEGIN
  194. WHILE fLen > 0 DO
  195. x := ORD(f[fR]);
  196. CASE x OF
  197. %s
  198. ELSE
  199. RETURN
  200. END;
  201. %s
  202. INC(fR); DEC(fLen)
  203. END
  204. END Encode;
  205. PROCEDURE NewEncoder* (): Codecs.Encoder;
  206. VAR e: Encoder;
  207. BEGIN
  208. NEW(e); RETURN e
  209. END NewEncoder;
  210. (* Decoder *)
  211. PROCEDURE (d: Decoder) Decode (IN f: ARRAY OF SHORTCHAR; VAR fR, fLen: INTEGER; VAR t: ARRAY OF CHAR; VAR tW: INTEGER; OUT state: BOOLEAN);
  212. VAR x, y: INTEGER;
  213. BEGIN
  214. WHILE fLen > 0 DO
  215. x := ORD(f[fR]);
  216. %s
  217. INC(fR); DEC(fLen)
  218. END;
  219. %s
  220. END Decode;
  221. PROCEDURE (d: Decoder) Reset%s;
  222. PROCEDURE NewDecoder* (): Codecs.Decoder;
  223. VAR d: Decoder;
  224. BEGIN
  225. NEW(d);%s RETURN d
  226. END NewDecoder;
  227. END %s.""" % (modName, errS, s0, '\n'.join(encS), s1, '\n'.join(decS), s2, s3, s4, modName)