GenArch: gen_arch.py

File gen_arch.py, 20.1 kB (added by peter@tortall.net, 1 year ago)

First cut of architecture generation (Python)

Line 
1 #! /usr/bin/env python2.4
2 # Yasm Architecture Generator
3 #
4 #  Copyright (C) 2006  Peter Johnson
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions
8 # are met:
9 # 1. Redistributions of source code must retain the above copyright
10 #    notice, this list of conditions and the following disclaimer.
11 # 2. Redistributions in binary form must reproduce the above copyright
12 #    notice, this list of conditions and the following disclaimer in the
13 #    documentation and/or other materials provided with the distribution.
14 #
15 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
16 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
19 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 # POSSIBILITY OF SUCH DAMAGE.
26 """
27 Generates architecture files based on an architectural description.
28
29 The architectural description is a full-fledged Python file that
30 is directly exec'ed by this script.
31
32 Note: this means the input file must be trusted!
33
34 The architecture file must set the following global variables:
35
36     arch - A string containing the architecture name.  This name is used
37         as the function prefix, as the arch keyword, and as the filename
38         prefix, so it should be a short lowercase name with no punctuation
39         or spaces.
40     lsb_bit - The bit number of the least significant bit in the
41         description of Fields.  The generator can handle both little and
42         big bit endians.
43     insn_bit_width - The number of bits in an instruction.
44     machines - A list of strings (machine keywords).
45     parsers - A list of strings (supported parser keywords).
46
47 After defining these globals, the architecture file should call the
48 functions in this module (particularly add_register, add_field, add_insn,
49 and add_derived_insn) to describe the ISA.
50 """
51 import copy
52
53 _lsb_bit = None
54 _insn_bit_width = None
55 _machines = []
56 _parsers = []
57
58 def config_arch(lsb_bit, insn_bit_width, machines, parsers):
59     global _lsb_bit, _insn_bit_width, _machines, _parsers
60     _lsb_bit = lsb_bit
61     _insn_bit_width = insn_bit_width
62     _machines = machines
63     _parsers = parsers
64
65 class _Struct(object):
66     """A structure-like class that supports creation of arbitrary members
67     and intelligent repr() via positional names."""
68     _pos_names = []
69
70     def __init__(self, **kwargs):
71         for k,v in kwargs.iteritems():
72             setattr(self, k, v)
73
74     def __repr__(self):
75         args = [repr(getattr(self, x)) for x in self._pos_names]
76         args.extend(x+"="+repr(getattr(self, x)) for x in self.__dict__
77                     if not x.startswith("_") and x not in self._pos_names)
78         return self.__class__.__name__+"("+', '.join(args)+")"
79
80 class Register(_Struct):
81     """A register class.  Register(name, reg_type, reg_index, **kwargs)"""
82     _pos_names = ['name', 'reg_type', 'reg_index']
83
84     def __init__(self, name, reg_type, reg_index, **kwargs):
85         self.name = name
86         self.reg_type = reg_type
87         self.reg_index = reg_index
88         for k,v in kwargs.iteritems():
89             setattr(self, k, v)
90
91 registers = {}
92 def add_register(name, reg_type, reg_index, **kwargs):
93     """Add a register to the global registry."""
94     reg = Register(name, reg_type, reg_index, **kwargs)
95     if name in registers:
96         raise ValueError("duplicate register name")
97     registers[name] = reg
98
99 class Field(_Struct):
100     """
101     An instruction field.  This can range in complexity:
102         lsb/msb/width: The class will figure out the missing
103             portion based on the other givens.
104         ooo: Out-of-order or separated groups of bits making up a single
105             field.  The constructor needs to be given a list of (msb, lsb)
106             tuples; internally this is converted into a list of
107             (msb, lsb, width) tuples.
108         rshift: The field contents should be shifted right before being
109             stored into the field bits.  The rshift value is the number
110             of bits to shift right.  Defaults to 0.
111         pc_rel: True if field contents are relative to the current PC
112             rather than to the segment start (absolute).  Defaults to
113             False.
114         signed: True if field contents should be treated as signed number.
115             Defaults to pc_rel.
116     """
117     def __init__(self, name, **kwargs):
118         self.name = name
119         self.rshift = 0
120         self.pc_rel = False
121         self.signed = False
122
123         ooo = kwargs.pop("ooo", None)
124         to_fields = kwargs.pop("to_fields", None)
125         if ooo:
126             self.ooo = [(msb, lsb, abs(msb-lsb)+1) for (msb, lsb) in ooo]
127         elif to_fields:
128             self.to_fields = to_fields
129         else:
130             width = kwargs.pop("width", None)
131             msb = kwargs.pop("msb", None)
132             lsb = kwargs.pop("lsb", None)
133
134             # Make width/msb/lsb all jive
135             if width is None:
136                 width = abs(msb-lsb)+1
137             elif msb is None:
138                 if _lsb_bit == 0: msb = lsb+(width-1)
139                 else: msb = lsb-(width-1)
140             elif lsb is None:
141                 if _lsb_bit == 0: lsb = msb-(width-1)
142                 else: lsb = msb+(width-1)
143             elif width != abs(msb-lsb)+1:
144                 raise ValueError("width, msb, and lsb do not match")
145             self.width = width
146             self.msb = msb
147             self.lsb = lsb
148             if not _insn_bit_width:
149                 raise ValueError("instruction bit width not set")
150             if msb < 0 or msb >= _insn_bit_width:
151                 raise ValueError("field extends past end of instruction")
152
153         for k,v in kwargs.iteritems():
154             setattr(self, k, v)
155         if self.pc_rel and "signed" not in kwargs:
156             self.signed = True
157
158 fields = {}
159 def add_field(name, **kwargs):
160     """Add a field to the global registry."""
161     field = Field(name, **kwargs)
162     if name in fields:
163         raise ValueError("duplicate field name")
164     fields[name] = field
165
166 class Instruction(_Struct):
167     """
168     An instruction format.  Consists of:
169         - The instruction name (mnemonic)
170         - A list of operands (names)
171         - A matching list of operand types
172         - A mapping of "fixed" field from field names to their fixed
173           numeric values
174         - A mapping from field names to operand names
175         - Miscellaneous other data that can be stored via keyword args
176     """
177     _pos_names = ['name', 'operands', 'operand_types', 'fields_fixed',
178                   'fields_operands']
179
180     def __init__(self, name, descr, operands, operand_types, fields_fixed,
181                  fields_operands=None, **kwargs):
182         self.name = name
183         self.__doc__ = descr
184         self.operands = operands
185         self.operand_types = operand_types
186         self.fields_fixed = fields_fixed
187         # Be nice and build a reasonable default for fields_operands
188         if fields_operands is None:
189             fields_operands = dict((x,x) for x in operands)
190         self.fields_operands = fields_operands
191         self.update(**kwargs)
192        
193     def update(self, **kwargs):
194         """
195         Updates instruction format contents based on keyword arguments.
196         Keywords may either be member names (e.g. keywords passed to
197         __init__) or one of the following special keywords:
198             update_fields_fixed: A mapping that updates the
199                 fields_fixed mapping (e.g. retaining unupdated members).
200             fix_operands: A mapping from operand name to fixed value;
201                 the corresponding operand is removed from the operands
202                 list and the fields_operands mapping and the operand name
203                 and fixed value are added to the fields_fixed mapping.
204                 Bug: For this to work properly the operand name and field
205                 name must match.
206             del_operands: Delete an operand and remove it from the
207                 fields_operands mapping.
208                 Bug: For this to work properly the operand name and field
209                 name must match.
210
211         If operands is changed and fields_operands is not, a reasonable
212         default is generated for fields_operands (field name = operand
213         name for all operands).
214         """
215         # Handle update_fields_fixed
216         update_fields_fixed = kwargs.pop("update_fields_fixed", None)
217         if update_fields_fixed is not None:
218             self.fields_fixed.update(update_fields_fixed)
219
220         # Handle fix_operands
221         fix_operands = kwargs.pop("fix_operands", None)
222         del_operands = kwargs.pop("del_operands", [])
223         if fix_operands is not None:
224             # Merge fixed operand values into fields_fixed
225             self.fields_fixed.update(fix_operands)
226             # Append to del_operands
227             del_operands.extend(o for o in fix_operands)
228
229         # Handle del_operands and the deletion portion of fix_operands
230         if del_operands:
231             # Delete from operands and operand_types
232             new_operands = []
233             new_operand_types = []
234             for o, t in zip(self.operands, self.operand_types):
235                 if o not in fix_operands:
236                     new_operands.append(o)
237                     new_operand_types.append(t)
238             self.operands = new_operands
239             self.operand_types = new_operand_types
240
241             # Delete from fields_operands
242             for o in fix_operands:
243                 del self.fields_operands[o]
244
245         # Update remaining keywords directly
246         for k,v in kwargs.iteritems():
247             setattr(self, k, v)
248
249         self.num_operands = len(self.operands)
250         self.operands = tuple(self.operands)
251         self.operand_types = tuple(self.operand_types)
252
253         # Be nice and update with a reasonable default for fields_operands
254         if "operands" in kwargs and "fields_operands" not in kwargs:
255             self.fields_operands = dict((x,x) for x in self.operands)
256
257         # Do some sanity checks
258         if len(self.operands) != len(self.operand_types):
259             raise ValueError("mismatch between operands and types length")
260
261         for (k,v) in self.fields_fixed.iteritems():
262             if k not in fields:
263                 raise ValueError("unrecognized field name '%s'" % k)
264         for (k,v) in self.fields_operands.iteritems():
265             if k not in fields:
266                 raise ValueError("unrecognized field name '%s'" % k)
267             if v not in self.operands:
268                 raise ValueError("field '%s' references nonexistent operand '%s'" % (k,v))
269
270         # Check for duplicates in fields_fixed and fields_operands
271         fset = set(x for x in self.fields_fixed)
272         fset &= set(x for x in self.fields_operands)
273         if fset:
274             raise ValueError("Duplicate fixed and operands fields %s" %
275                              str(fset))
276
277         # Build an operand_fields mapping
278         o2f = dict((o,f) for (f,o) in self.fields_operands.iteritems())
279         self.operand_fields = tuple(o2f[x] for x in self.operands)
280
281 instructions = {}
282 used_types = set()
283 max_operands = 0
284 def add_insn_base(name, parent=None, **kwargs):
285     """Add an instruction format to the global registry."""
286     global used_types, max_operands
287
288     insns = instructions.setdefault(name, [])
289     if parent:
290         insn = copy.deepcopy(parent)
291         insn.update(**kwargs)
292     else:
293         insn = Instruction(name, **kwargs)
294     insns.append(insn)
295
296     used_types |= set(insn.operand_types)
297     if insn.num_operands > max_operands:
298         max_operands = insn.num_operands
299
300 def add_insn(name, descr, operands, operand_types, fields_fixed,
301              fields_operands=None, **kwargs):
302     """
303     Add an instruction format to the global registry, with positional
304     arguments (name, descr, operands, operand_types, fields_fixed,
305     fields_operands) as well as accepting additional keyword arguments.
306     """
307     kwargs["descr"] = descr
308     kwargs["operands"] = operands
309     kwargs["operand_types"] = operand_types
310     kwargs["fields_fixed"] = fields_fixed
311     kwargs["fields_operands"] = fields_operands
312     add_insn_base(name, **kwargs)
313
314 def add_derived_insn(name, parent, **kwargs):
315     """
316     Add a derived instruction to the global registry.  This copies all
317     instruction formats from the specified parent instruction (looked up
318     by name) to a new name, possibly modifying via keyword arguments.
319     """
320     for x in instructions[parent]:
321         add_insn_base(name, parent=x, **kwargs)
322
323 def add_derived_one_insn(name, parent, **kwargs):
324     add_insn_base(name, parent=instructions[parent][0], **kwargs)
325
326 def add_macro_insn(name, descr, operands, operand_types, parent,
327                    parent_operands):
328     raise NotImplemented
329     # Find a match on the parent with matching operands/operand_types.
330     for insn in instructions[parent]:
331         pass
332
333 def merge_fields(**fieldvals):
334     """
335     Merge a set of Fields into a single integer value.
336     Checks for overlaps.
337     """
338     opcode = 0
339     filled = 0
340     for fname, v in fieldvals.iteritems():
341         f = fields[fname]
342         try:
343             ooo = f.ooo
344         except AttributeError:
345             ooo = [(f.msb, f.lsb)]
346         for g in ooo:
347             width = abs(g[0]-g[1])+1
348             if _lsb_bit == 0:
349                 pos = g[1]
350             else:
351                 pos = _lsb_bit-g[1]
352             mask = 0
353             for x in range(width):
354                 mask |= 1<<x
355             if filled & (mask<<pos):
356                 raise ValueError("field overlap (filled %x, want '%s' (%x))" %
357                                  (filled, fname, mask<<pos))
358             filled |= mask << pos
359             opcode |= ((v>>f.rshift) & mask) << pos
360             v >>= width
361     return opcode
362
363 def get_insn_fixed(name, index):
364     """Get the value of combining just the fixed fields for the
365     corresponding instruction name and format index."""
366     return merge_fields(**instructions[name][index]["fields_fixed"])
367
368 def get_groups():
369     """
370     Determine sets of instructions ("groups") that share the same operand
371     lists and mapping from operand to fields.
372     """
373     groups = {}
374     for iname in instructions:
375         # First part of signature: operand types
376         all_types = sorted(insn.operand_types
377                            for insn in instructions[iname])
378         sig1 = tuple(tuple(typelist) for typelist in all_types)
379
380         # Second part of signature: operand->field mapping
381         for insn in instructions[iname]:
382             # Lookup from operand name to operand index
383             # (this is needed because each instruction can have its own
384             # naming for operands)
385             oi = dict((y,x) for (x,y) in enumerate(insn.operands))
386             sig2 = tuple(sorted((oi[op],f) for (f, op)
387                                 in insn.fields_operands.iteritems()))
388
389         # Save instruction into field mapping
390         groups.setdefault((sig1, sig2), []).append(iname)
391     return groups
392
393 def get_msb(v):
394     if v < 0:
395         raise ValueError("does not support negative numbers")
396     n = 0
397     while v != 0:
398         v >>= 1
399         n += 1
400     return n
401
402 def get_mask(v):
403     msb = get_msb(v)
404     if msb == 0: msb = 1
405     return sum(1<<x for x in range(msb))
406
407 def gen_arch_h(out, arch):
408     """Generate [arch]_arch.c output file."""
409     pass
410
411 def gen_arch_c(out, arch):
412     """Generate [arch]_arch.h output file."""
413     pass
414
415 def gen_id(out, arch):
416     """Generate [arch]_id.c output file."""
417     import pprint
418     from textwrap import wrap
419     pp = pprint.PrettyPrinter(indent=4)
420     #pp.pprint(registers)
421     #pp.pprint(fields)
422     #pp.pprint(instructions)
423
424     define_lines = []
425     for n, v in enumerate(used_types):
426         define_lines.append("#define OPT_%s\t%#x" % (v, n))
427     define_lines.append("#define OPT_MASK\t%#x" % get_mask(n))
428     shift = get_msb(n)
429
430     define_lines.append("")
431
432     for n, v in enumerate(fields):
433         define_lines.append("#define OPA_%s\t(%#x<<%d)" % (v, n, shift))
434     define_lines.append("#define OPA_MASK\t(%#x<<%d)" % (get_mask(n), shift))
435
436     print >> out, '\n'.join(define_lines)
437     print >> out
438
439     groups = get_groups()
440     #pp.pprint(groups)
441
442     iname_grp = {}
443     iname_needfields = {}
444     group_lines = []
445     for num, match in enumerate(sorted(groups)):
446         # Determine field differences between instructions in group
447         fields_common = {}
448         fields_diff = {}
449         for iname in groups[match]:
450             for insn in instructions[iname]:
451                 common_def = dict((f,o) for (f,o)
452                                   in insn.fields_fixed.iteritems())
453                 common = fields_common.setdefault(insn.operand_types,
454                                                   common_def)
455                 diff = fields_diff.setdefault(insn.operand_types, set())
456                 for (field, operand) in insn.fields_fixed.iteritems():
457                     if field not in common:
458                         diff.add(field)
459                     elif operand != common[field]:
460                         del common[field]
461                         diff.add(field)
462
463         #pp.pprint(fields_diff)
464         
465         for iname in groups[match]:
466             iname_grp[iname] = "grp%d" % num
467             iname_needfields[iname] = set()
468             for fieldset in fields_diff.itervalues():
469                 iname_needfields[iname] |= fieldset
470                 for field in fieldset:
471                     for insn in instructions[iname]:
472                         try: del fields_common[insn.operand_types][field]
473                         except KeyError: pass
474
475         #pp.pprint(fields_common)
476         #pp.pprint(iname_needfields)
477
478         group_lines.extend(wrap("/* %s */" % ', '.join(groups[match]),
479                                 subsequent_indent=" * "))
480         group_lines.append("static const %s_insn_info grp%d = {" % (arch, num))
481         insn_lines = []
482         for insn in instructions[groups[match][0]]:
483             insn_line = []
484             insn_line.append("0x%0*x" % (_insn_bit_width/4,
485                              merge_fields(**fields_common[insn.operand_types])))
486             insn_line.append(str(insn.num_operands))
487            
488             operands = ["OPT_%s|OPA_%s" % (t, f) for (t, f)
489                         in zip(insn.operand_types, insn.operand_fields)]
490             operands.extend("0" for x in range(max_operands-insn.num_operands))
491             insn_line.append("{"+', '.join(operands)+"}")
492             insn_lines.append("    { "+', '.join(insn_line)+" }")
493         group_lines.extend(',\n'.join(insn_lines).split('\n'))
494         group_lines.append("};")
495         group_lines.append("")
496     print >> out, '\n'.join(group_lines)
497
498     return iname_grp, iname_needfields
499
500 def gen_gap(out, arch, iname_grp, iname_needfields):
501     """Generate [arch]_parse.gap output file."""
502     print >> out, "ARCH\t%s" % arch
503     print >> out, "PARSERS\t%s" % ' '.join(_parsers)
504
505     insn_lines = []
506     for iname in sorted(iname_grp):
507         fields_fixed = instructions[iname][0].fields_fixed
508         #print iname, fields_fixed, iname_needfields[iname]
509         fvs = dict((f, fields_fixed[f]) for f in iname_needfields[iname]
510                    if f in fields_fixed)
511         insn_lines.append("INSN\t-\t%s\t%s\t0x%0*x" %
512                           (iname, iname_grp[iname], _insn_bit_width/4,
513                            merge_fields(**fvs)))
514     print >> out, '\n'.join(insn_lines)
515     print >> out
516
517     reg_lines = []
518     for regname in sorted(registers):
519         reg = registers[regname]
520         reg_lines.append("REG\t%s\t%s\t%d" %
521                          (regname, reg.reg_type, reg.reg_index))
522     print >> out, '\n'.join(reg_lines)+"\n"
523
524 def gen_code(arch):
525     """Generate all output files."""
526     iname_grp, iname_needfields = gen_id(open("%sid.c" % arch, "w"), arch)
527     gen_gap(open("%sparse.gap" % arch, "w"), arch, iname_grp,
528             iname_needfields)
529     gen_arch_h(open("%sarch.h" % arch, "w"), arch)
530     gen_arch_c(open("%sarch.c" % arch, "w"), arch)
531