s502 assembler
A very simple assembler for the 6502 line of processors written in C
Public Member Functions | Data Fields | Private Member Functions
Token Class Reference

Token type to store token information. More...

#include <token_t.h>

Public Member Functions

void token_print (Token *token)
 Pretty-print one token, with its source and length. More...
 
int token_recognize (Token *t)
 Parse token - test if it's an opcode, a label or a directive. More...
 
int token_analyze_instruction (State *s, Token *t)
 analyze instruction token (instruction, addressmode & operand) More...
 
int token_compile (State *s, Token *t, char **dataptr)
 compile token into binary data More...
 
int token_get_operand (State *s, Token *t)
 parse the operand of the instruction as a number More...
 

Data Fields

int binSize
 number of bytes this token will generate More...
 
enum tokenType type
 type of this token More...
 
struct {
enum AddressMode addressmode
 Address mode of the instruction. More...
 
int number
 operand of the instruction More...
 
Instructioninst
 pointer to instruction data More...
 
int address
 address of instruction in resulting bytecode More...
 
instr
 instruction data. Not used if token type is not TT_INSTR More...
 
char stripped [TOKEN_BUFFER_SIZE]
 stripped text from source file More...
 
int len
 length of stripped text More...
 
struct {
char fname [TOKEN_SOURCE_FILE_SIZE]
 file name of source file More...
 
int lineno
 line in source file More...
 
source
 source of this token More...
 

Private Member Functions

int token_link_instruction (State *s, Token *token)
 find and link the instruction entry for a token More...
 
int token_get_addressmode (Token *t)
 Determine the address mode of a token. More...
 

Detailed Description

Token type to store token information.

Definition at line 37 of file token_t.h.

Member Function Documentation

◆ token_analyze_instruction()

int token_analyze_instruction ( State s,
Token t 
)

analyze instruction token (instruction, addressmode & operand)

Returns
0 on success, -1 on error

Definition at line 224 of file tokenFunc.c.

224  {
225  if (token_link_instruction(s, t) < 0) {
226  ERROR("Unknown instruction!\n");
227  goto ERR;
228  }
229  if (token_get_addressmode(t) < 0) {
230  ERROR("Can not determine instruction address mode!\n");
231  goto ERR;
232  }
233  if (t->instr.inst->opcs[t->instr.addressmode] == OPC_INVALID) {
234  ERROR("Invalid instruction-addressmode combination!\n");
235  ERROR("A-mode: %s\n", ADRM_NAMES[t->instr.addressmode]);
236  goto ERR;
237  }
238 
239  t->binSize = 1 + ADRM_SIZES[t->instr.addressmode];
240 
241  return 0;
242 
243 
244 ERR:
245  token_print(t);
246  FAIL("Error while analyzing token: \n");
247  return -1;
248 }

Referenced by pass_one().

◆ token_compile()

int token_compile ( State s,
Token t,
char **  dataptr 
)

compile token into binary data

Parameters
dataptrchar** to return data to
sassembler state
ttoken to compile
Returns
the number of bytes in dataptr

Definition at line 318 of file tokenFunc.c.

318  {
319  if (t->type != TT_INSTR) {
320  if (t->type == TT_DIRECTIVE) {
321  return directive_compile(s, t, dataptr);
322  }
323  // should NEVER reach this:
324  ERROR("Something went terribly wrong!\n");
325  ERROR("A LABEL in last pass!\n");
326  return -1;
327  }
328  int size = t->binSize;
329  char* data = malloc(size);
330  *dataptr = data;
331  if (t->instr.addressmode == ADRM_REL) {
332  int n = t->instr.number - t->instr.address - 2;
333 
334  if (-128 > n || 127 < n) {
335  ERROR("Relative addressing jump too far!\n");
336  printf("Target: $%x, from: $%x (diff: $%x)\n", t->instr.number, t->instr.address, n);
337  token_print(t);
338  free(data);
339  *dataptr = NULL;
340  return -1;
341  }
342  t->instr.number = n;
343  }
344  data[0] = t->instr.inst->opcs[t->instr.addressmode];
345  if (size > 1) {
346  data[1] = t->instr.number & 0xff;
347  }
348  if (size > 2) {
349  data[2] = (t->instr.number >> 8) & 0xff;
350  }
351  return 0;
352 }

Referenced by concat_bin().

◆ token_get_addressmode()

int token_get_addressmode ( Token t)
private

Determine the address mode of a token.

Returns
0 on success and -1 on error

(modifies the token in-place)

Definition at line 109 of file tokenFunc.c.

109  {
110 
111  // step 1 - implied
112  if (t->len == 3) {
113  t->instr.addressmode = ADRM_IMP;
114  return 0;
115  }
116 
117 
118  // step 2 - acc
119  if (t->len == 5 && util_match_char(t->stripped[4], 'a')) {
120  t->instr.addressmode = ADRM_ACC;
121  return 0;
122  }
123 
124 
125  // step 3 - imm
126  if (t->stripped[4] == '#') {
127  t->instr.addressmode = ADRM_IMM;
128  return 0;
129  }
130 
131 
132  // step 4 - relative
133  if (t->instr.inst->opcs[ADRM_REL] != OPC_INVALID) {
134  // this can ONLY be a relative
135  t->instr.addressmode = ADRM_REL;
136  return 0;
137  }
138 
139 
140 
141  // step 5,6,7 have a few checks in common
142  // so we can avoid code duplication by fatoring that out
143 
144  int s_x = 0, s_y = 0, s_close = 0, s_sep = 0; // seen x, y, close or coma
145 
146  for (int i = 0; i < t->len; i++) {
147  if (util_match_char(t->stripped[i], 'x') && s_sep)
148  s_x = 1;
149  if (util_match_char(t->stripped[i], 'y') && s_sep)
150  s_y = 1;
151  if (t->stripped[i] == ',')
152  s_sep = 1;
153  if (t->stripped[i] == ')')
154  s_close = 1;
155  }
156 
157  // must have a separator if it has an index
158  if ((s_x || s_y) && !s_sep)
159  return -1;
160 
161  // can not have both indexes
162  if (s_x && s_y)
163  return -1;
164 
165  // end of common chekcs
166 
167 
168  // step 5 - zeropage
169  if (t->stripped[4] == '*') {
170  // must NOT have a close
171  if (s_close)
172  return -1;
173 
174  // no index - normal zpg
175  if (!(s_x || s_y)) {
176  t->instr.addressmode = ADRM_ZPG;
177  return 0;
178  }
179 
180  // Set according to index
181  t->instr.addressmode = s_x ? ADRM_ZPG_X : ADRM_ZPG_Y;
182  return 0;
183  }
184 
185  // step 6 - indirect
186  if (t->stripped[4] == '(') {
187  // must have a close )
188  if (!s_close)
189  return -1;
190 
191  // no index - normal inderect
192  if (!(s_x || s_y)) {
193  t->instr.addressmode = ADRM_IND;
194  return 0;
195  }
196 
197  // Set according to index
198  t->instr.addressmode = s_x ? ADRM_IND_X : ADRM_IND_Y;
199  return 0;
200  }
201 
202 
203  // step 7 - absolute
204  {
205  // must not have a close )
206  if (s_close)
207  return -1;
208 
209  // no index - normal absolute
210  if (!(s_x || s_y)) {
211  t->instr.addressmode = ADRM_ABS;
212  return 0;
213  }
214 
215  // Set according to index
216  t->instr.addressmode = s_x ? ADRM_ABS_X : ADRM_ABS_Y;
217  return 0;
218  }
219 
220  return -1;
221 }

References ADRM_ABS, ADRM_ABS_X, ADRM_ABS_Y, ADRM_ACC, ADRM_IMM, ADRM_IMP, ADRM_IND, ADRM_IND_X, ADRM_IND_Y, ADRM_REL, ADRM_ZPG, ADRM_ZPG_X, ADRM_ZPG_Y, instr, len, OPC_INVALID, stripped, and util_match_char().

Referenced by token_analyze_instruction().

◆ token_get_operand()

int token_get_operand ( State s,
Token t 
)

parse the operand of the instruction as a number

Modifies the token in-places. Does NOT fail if operand is an undefined label!

Definition at line 287 of file tokenFunc.c.

287  {
288  if (t->type != TT_INSTR) return 0;
289  if (t->binSize == 1) {
290  t->instr.number = 0;
291  return 0;
292  }
293 
294  char* begin = &t->stripped[4];
295  char* end;
296  for (; *begin != 0 && (*begin == ' ' || *begin == '*' || *begin == '(' || *begin == '#'); begin++);
297  for (end = begin; *end != 0 && *end != ')' && *end != ',' && *end != ' '; end++);
298 
299  char* buff = malloc(end - begin + 1);
300  strncpy(buff, begin, end - begin + 1);
301  buff[end - begin] = 0;
302 
303  int n = number_get_number(s, buff);
304  free(buff);
305 
306  if (n == NUMBER_ERROR) {
307  token_print(t);
308  FAIL("Opcode operand parsing failed!\n");
309  return -1;
310  }
311  if (n == NUMBER_LABEL_NODEF) {
312  return 0;
313  }
314  t->instr.number = n;
315  return 0;
316 }

Referenced by pass_one(), and pass_two().

◆ token_link_instruction()

int token_link_instruction ( State s,
Token token 
)
private

find and link the instruction entry for a token

Returns
0 on success, -1 on error

Definition at line 30 of file tokenFunc.c.

30  {
31  token->instr.inst = instruction_find(s->instr, token->stripped);
32  if (token->instr.inst == NULL) return -1;
33  return 0;
34 }

References State::instr, instr, Instruction::instruction_find(), and stripped.

Referenced by token_analyze_instruction().

◆ token_print()

void token_print ( Token token)

Pretty-print one token, with its source and length.

Definition at line 20 of file tokenFunc.c.

20  {
21  printf("\t%s:%d:%d\t\t'%.*s'\n", token->source.fname, token->source.lineno, token->len, token->len, token->stripped);
22 }

Referenced by compile_data(), compile_pad(), pass_one(), pass_two(), read_file(), read_token(), token_analyze_instruction(), token_compile(), token_get_operand(), token_recognize(), and tokenslist_debug_print().

◆ token_recognize()

int token_recognize ( Token t)

Parse token - test if it's an opcode, a label or a directive.

Parameters
ttoken to recognize - will be modified in-place
Returns
0 on success, -1 on error

Definition at line 251 of file tokenFunc.c.

251  {
252  // how many token types does it fit
253  int found = 0;
254 
255  // directive - starts with a dot
256  if (t->stripped[0] == '.') {
257  t->type = TT_DIRECTIVE;
258  found++;
259  }
260 
261  // label - ends with a ':'
262  if (t->stripped[t->len - 1] == ':') {
263  t->type = TT_LABEL;
264  found++;
265  }
266 
267  // instruction - 3rd char is a space or len is 3
268  if (t->stripped[3] == ' ' || t->stripped[3] == '\0') {
269  t->type = TT_INSTR;
270  t->instr.number = -1;
271  found++;
272  }
273 
274  // 0 or more than one match is a problem
275  if (found != 1) {
276  ERROR("Can not recognize token:\n");
277  token_print(t);
278  return -1;
279  }
280 
281  LOG(4, "Recognized token as %d:\n", t->type);
282  LOGDO(4, token_print(t));
283 
284  return 0;
285 }

Referenced by tokenslist_recognize().

Field Documentation

◆ binSize

int Token::binSize

number of bytes this token will generate

Definition at line 39 of file token_t.h.

Referenced by compile_data(), compile_pad(), pass_one(), read_token(), token_analyze_instruction(), token_compile(), and token_get_operand().

◆ instr

struct { ... } Token::instr

◆ len

int Token::len

length of stripped text

Definition at line 57 of file token_t.h.

Referenced by pass_one(), read_file(), read_token(), token_get_addressmode(), token_print(), and token_recognize().

◆ source

struct { ... } Token::source

source of this token

Referenced by read_file(), and token_print().

◆ stripped

char Token::stripped[TOKEN_BUFFER_SIZE]

◆ type

enum tokenType Token::type

type of this token

Definition at line 39 of file token_t.h.

Referenced by pass_one(), token_compile(), token_get_operand(), and token_recognize().


The documentation for this class was generated from the following files:
ADRM_ZPG_X
@ ADRM_ZPG_X
Zeropage, X indexed.
Definition: addressmode.h:45
TT_INSTR
@ TT_INSTR
instruction token
Definition: token_t.h:26
Token::instr
struct Token::@1 instr
instruction data. Not used if token type is not TT_INSTR
ADRM_NAMES
const char * ADRM_NAMES[]
Human-readable names of address modes.
Definition: addressmode.c:16
Token::stripped
char stripped[TOKEN_BUFFER_SIZE]
stripped text from source file
Definition: token_t.h:55
ADRM_REL
@ ADRM_REL
Relative (8bit, signed 2's complement)
Definition: addressmode.h:39
Token::token_link_instruction
int token_link_instruction(State *s, Token *token)
find and link the instruction entry for a token
Definition: tokenFunc.c:30
Token::token_get_addressmode
int token_get_addressmode(Token *t)
Determine the address mode of a token.
Definition: tokenFunc.c:109
ADRM_ZPG
@ ADRM_ZPG
Zeropage.
Definition: addressmode.h:42
ADRM_IND_X
@ ADRM_IND_X
Indirect (8 bit), X indexed.
Definition: addressmode.h:33
ADRM_IND
@ ADRM_IND
Indirect (16 bit)
Definition: addressmode.h:30
State::instr
Instruction * instr
instruction data
Definition: state.h:40
LOG
#define LOG(LVL,...)
logging macro - works like printf
Definition: logging.h:28
LOGDO
#define LOGDO(LVL, x)
Conditional macro. Wraps contents into a conditional based on log level.
Definition: logging.h:35
ADRM_SIZES
int ADRM_SIZES[ADRM_COUNT+1]
operand sizes of addressmodes
Definition: addressmode.c:37
ADRM_ZPG_Y
@ ADRM_ZPG_Y
Zeropage, Y indexed.
Definition: addressmode.h:48
util_match_char
int util_match_char(char a, char b)
Case-insensitive character compare.
Definition: util.c:21
ADRM_ACC
@ ADRM_ACC
Accumulator.
Definition: addressmode.h:12
Token::binSize
int binSize
number of bytes this token will generate
Definition: token_t.h:39
TT_DIRECTIVE
@ TT_DIRECTIVE
directive token
Definition: token_t.h:28
Token::type
enum tokenType type
type of this token
Definition: token_t.h:41
Token::token_print
void token_print(Token *token)
Pretty-print one token, with its source and length.
Definition: tokenFunc.c:20
directive_compile
int directive_compile(State *s, Token *t, char **dataptr)
Compile a directive into binary data.
Definition: directive.c:498
ADRM_IMM
@ ADRM_IMM
Immidiate.
Definition: addressmode.h:24
TT_LABEL
@ TT_LABEL
label token
Definition: token_t.h:30
ADRM_IND_Y
@ ADRM_IND_Y
Indirect (8 bit), Y indexed.
Definition: addressmode.h:36
Token::source
struct Token::@2 source
source of this token
Token::len
int len
length of stripped text
Definition: token_t.h:57
ADRM_ABS_Y
@ ADRM_ABS_Y
Absolute, Y indexed.
Definition: addressmode.h:21
ADRM_ABS_X
@ ADRM_ABS_X
Absolute, X indexed.
Definition: addressmode.h:18
NUMBER_ERROR
@ NUMBER_ERROR
Could not parse a number or constant is undefined.
Definition: number.h:15
ADRM_IMP
@ ADRM_IMP
Implied (no operand)
Definition: addressmode.h:27
instruction_find
Instruction * instruction_find(Instruction *list, char *mnem)
Definition: instructions.c:111
number_get_number
int number_get_number(State *s, char *str)
interpret a string as a constant, label or number
Definition: number.c:75
ADRM_ABS
@ ADRM_ABS
Absolute addressing.
Definition: addressmode.h:15
OPC_INVALID
@ OPC_INVALID
An invalid opcode to signal invalid / non-existent variations.
Definition: instructions.h:15
FAIL
#define FAIL(...)
Fancy-print a fail (failed step). Works like printf.
Definition: logging.h:45
ERROR
#define ERROR(...)
Fancy-print an error (cause of faliure). Works like printf.
Definition: logging.h:40
NUMBER_LABEL_NODEF
@ NUMBER_LABEL_NODEF
Undefined label.
Definition: number.h:17