Merge pull request #41 from jiangyy/litenes-ppu-opt

Litenes ppu opt
This commit is contained in:
Yanyan Jiang 2019-04-29 08:27:18 +00:00 committed by GitHub
commit 490756cbb8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 386 additions and 629 deletions

View File

@ -15,13 +15,6 @@ typedef int bool;
// #define log(fmt, ...) printk("%s:%d: " fmt, __func__, __LINE__, ## __VA_ARGS__)
#define log(fmt, ...)
static inline uint32_t pixel(uint8_t r, uint8_t g, uint8_t b) {
return (r << 16) | (g << 8) | b;
}
static inline uint8_t R(uint32_t p) { return p >> 16; }
static inline uint8_t G(uint32_t p) { return p >> 8; }
static inline uint8_t B(uint32_t p) { return p; }
// Byte Bit Operations
void common_set_bitb(byte *variable, byte position);
void common_unset_bitb(byte *variable, byte position);
@ -44,14 +37,14 @@ static inline bool common_bit_set(unsigned long value, byte position) { return v
static inline byte byte_pack(int expand[8]) {
byte v = 0;
v |= (expand[0] << 0);
v |= (expand[1] << 1);
v |= (expand[2] << 2);
v |= (expand[3] << 3);
v |= (expand[4] << 4);
v |= (expand[5] << 5);
v |= (expand[6] << 6);
v |= (expand[7] << 7);
v |= (!!expand[0] << 0);
v |= (!!expand[1] << 1);
v |= (!!expand[2] << 2);
v |= (!!expand[3] << 3);
v |= (!!expand[4] << 4);
v |= (!!expand[5] << 5);
v |= (!!expand[6] << 6);
v |= (!!expand[7] << 7);
return v;
}

View File

@ -44,8 +44,4 @@ typedef struct {
int P[8]; // Expended Flag Register
} CPU_STATE;
extern CPU_STATE cpu;
extern byte CPU_RAM[0x8000];
#endif

View File

@ -15,8 +15,6 @@ void fce_init();
void fce_run();
void fce_update_screen();
extern int frame_cnt;
extern char rom_mario_nes[];
static inline void draw(int col, int row, int idx) {
@ -28,10 +26,18 @@ static inline void draw(int col, int row, int idx) {
}
#else
extern const uint32_t palette[64];
extern uint32_t screen[H][W];
if (col >= 0 && col < W && row < H) {
screen[row][col] = palette[idx];
}
extern uint32_t screen[H][W + 8 + 256];
screen[row][col] = palette[idx];
#endif
}
static inline void draw_color(int col, int row, uint32_t c) {
#ifdef STRETCH
// not support stretch mode yet
assert(0);
#else
extern uint32_t screen[H][W + 8 + 256];
screen[row][col] = c;
#endif
}

View File

@ -20,7 +20,6 @@ static inline uint32_t memory_readb(uint32_t address) {
static inline uint32_t instr_fetch(uint32_t address) {
extern byte memory[0x10000]; // mmc
//extern byte CPU_RAM[0x8000]; // CPU Memory
// for super mairo, all fetch are from mmc
return memory[address];
@ -31,13 +30,7 @@ static inline void memory_writeb(uint32_t address, uint32_t byte_data) {
if (idx == 0) { cpu_ram_write(address, byte_data); }
else if (idx == 1) { ppu_io_write(address, byte_data); }
else if (idx == 2) {
if (address == 0x4014) {
// DMA transfer
int i;
for (i = 0; i < 256; i++) {
ppu_sprram_write(cpu_ram_read((0x100 * (byte_data & 0xff)) + i));
}
}
if (address == 0x4014) W4014(byte_data);
else psg_io_write(address, byte_data);
}
else if (idx == 3) { cpu_ram_write(address, byte_data); }

View File

@ -1,6 +1,10 @@
#include "common.h"
byte mmc_read(word address);
static inline uint32_t mmc_read(word address) {
extern byte memory[0x10000];
return memory[address];
}
void mmc_write(word address, byte data);
void mmc_copy(word address, byte *source, int length);
void mmc_append_chr_rom_page(byte *source);

View File

@ -3,32 +3,32 @@
#ifndef PPU_H
#define PPU_H
extern byte PPU_SPRRAM[0x100];
extern byte PPU_RAM[0x4000];
void ppu_init();
void ppu_finish();
int ppu_read_idx(void);
byte ppu_io_read(word address);
void ppu_io_write(word address, byte data);
bool ppu_generates_nmi();
void ppu_set_generates_nmi(bool yesno);
static inline void W4014(byte data) {
// DMA transfer
// assert(ppu.OAMADDR == 0);
extern byte PPU_SPRRAM[0x100];
extern byte CPU_RAM[0x8000];
void *src = &CPU_RAM[0x100 * data];
void *dest = &PPU_SPRRAM[0];
memcpy(dest, src, 256);
}
bool ppu_generates_nmi();
void ppu_set_mirroring(byte mirroring);
void ppu_run(int cycles);
//void ppu_cycle();
int ppu_scanline();
void ppu_set_scanline(int s);
void ppu_cycle();
void ppu_copy(word address, byte *source, int length);
void ppu_sprram_write(byte data);
// PPU Memory and State
typedef struct {
// byte PPUCTRL; // $2000 write only
byte PPUCTRL; // $2000 write only
int PPUMASK[8]; // $2001 write only, expanded ppu mask
byte PPUSTATUS; // $2002 read only
byte OAMADDR; // $2003 write only
@ -47,23 +47,4 @@ typedef struct {
int x, scanline;
} PPU_STATE;
extern PPU_STATE ppu;
extern byte ppu_latch;
extern bool ppu_sprite_hit_occured;
// Screen State and Rendering
static inline byte ppu_l_h_addition(int h, int l, int x) {
return (((h >> (7 - x)) & 1) << 1) | ((l >> (7 - x)) & 1);
}
static inline byte ppu_l_h_addition_flip(int l, int h, int x) {
return (((h >> x) & 1) << 1) | ((l >> x) & 1);
}
// Draws current screen pixels in ppu_background_pixels & ppu_sprite_pixels and clears them
void ppu_render_screen();
void ppu_set_background_color(byte color);
#endif

View File

@ -6,9 +6,41 @@
#ifndef PSG_H
#define PSG_H
extern unsigned char psg_joy1[8];
extern int key_p;
byte psg_io_read(word address);
void psg_io_write(word address, byte data);
static inline byte psg_io_read(word address) {
static int const MAP[256] = {
0, // On/Off
_KEY_G, // A
_KEY_H, // B
_KEY_T, // SELECT
_KEY_Y, // START
_KEY_W,
_KEY_S,
_KEY_A,
_KEY_D,
255,
};
extern int key_state[];
// Joystick 1
if (address == 0x4016) {
if (key_p++ < 9) {
return key_state[MAP[key_p]];
}
}
return 0;
}
static inline void psg_io_write(word address, byte data) {
static byte prev_write;
if (address == 0x4016) {
if ((data & 1) == 0 && prev_write == 1) {
// strobe
key_p = 0;
}
}
prev_write = data & 1;
}
#endif

View File

@ -5,7 +5,7 @@
//#define STATISTIC
CPU_STATE cpu;
static CPU_STATE cpu;
//int op_cycles; // Additional instruction cycles used (e.g. when paging occurs)
@ -30,16 +30,6 @@ static int cycle_table[256] = {
/*0xF0*/ 2,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,
};
//static void (*cpu_op_address_mode[256])(); // Array of address modes
//static void (*cpu_op_handler[256])(); // Array of instruction function pointers
//bool cpu_op_in_base_instruction_set[256]; // true if instruction is in base 6502 instruction set
//char *cpu_op_name[256]; // Instruction names
//static int cpu_op_cycles[256]; // CPU cycles used by instructions
// If OP_TRACE, print current instruction with all registers into the console
void cpu_trace_instruction();
// CPU Adressing Modes
#define cpu_address_implied(exec) { exec(true); }
@ -81,10 +71,6 @@ void cpu_trace_instruction();
uint32_t op_address = (instr_fetchw(PCreg) + Xreg) & 0xffff; \
uint32_t op_value = memory_readb(op_address); \
PCreg += 2; \
\
if ((op_address ^ PCreg) >> 8) { \
op_cycles++; \
} \
exec(false); \
}
@ -92,21 +78,10 @@ void cpu_trace_instruction();
uint32_t op_address = (instr_fetchw(PCreg) + Yreg) & 0xFFFF; \
uint32_t op_value = memory_readb(op_address); \
PCreg += 2; \
\
if ((op_address ^ PCreg) >> 8) { \
op_cycles++; \
} \
exec(false); \
}
#define cpu_address_relative(exec) { \
uint32_t op_address = instr_fetch(PCreg); \
PCreg++; \
op_address = PCreg + (int8_t)op_address; \
\
if ((op_address ^ PCreg) >> 8) { \
op_cycles++; \
} \
exec(false); \
}
@ -140,10 +115,6 @@ void cpu_trace_instruction();
uint32_t op_address = (temp + (Yreg & 0xff)) & 0xFFFF; \
uint32_t op_value = memory_readb(op_address); \
PCreg++; \
\
if ((op_address ^ PCreg) >> 8) { \
op_cycles++; \
} \
exec(false); \
}
@ -185,20 +156,12 @@ void cpu_trace_instruction();
#define cpu_address_absolute_x_notload(exec) { \
uint32_t op_address = (instr_fetchw(PCreg) + Xreg) & 0xffff; \
PCreg += 2; \
\
if ((op_address ^ PCreg) >> 8) { \
op_cycles++; \
} \
exec(false); \
}
#define cpu_address_absolute_y_notload(exec) { \
uint32_t op_address = (instr_fetchw(PCreg) + Yreg) & 0xFFFF; \
PCreg += 2; \
\
if ((op_address ^ PCreg) >> 8) { \
op_cycles++; \
} \
exec(false); \
}
@ -215,10 +178,6 @@ void cpu_trace_instruction();
uint32_t temp = cpu_ram_readw(arg_addr); \
uint32_t op_address = (temp + (Yreg & 0xff)) & 0xFFFF; \
PCreg++; \
\
if ((op_address ^ PCreg) >> 8) { \
op_cycles++; \
} \
exec(false); \
}
@ -272,33 +231,31 @@ static int g_lz_zn_result = FLAG_STATE_UPTODATE; // for zero and negative flags
int lz_ov_result, lz_ov_src1, lz_ov_src1, lz_ov_state; // for overflow flag
int lz_c_result, lz_c_src1, lz_c_src1, lz_c_state; // for carry flag
#define lz_is_z_lazy(lz_zn_result) (lz_zn_result & (1 << 31))
#define lz_is_n_lazy(lz_zn_result) (lz_zn_result & (1 << 25))
#define cpu_flag_set(flag) cpu.P[flag]
#define cpu_modify_flag(flag, value) cpu.P[flag] = !!(value)
#define cpu_flag_setbit(flag) !!cpu.P[flag]
#define cpu_modify_flag(flag, value) cpu.P[flag] = (value)
#define cpu_set_flag(flag) cpu.P[flag] = 1
#define cpu_unset_flag(flag) cpu.P[flag] = 0
#define cpu_update_zn_flags(value) { \
lz_zn_result = (value) | (1 << 25) | (1 << 31); \
lz_zn_result = (value) | (1 << 25); \
/* cpu_modify_flag(zero_bp, (value & 0xff) == 0); */ \
/* cpu_modify_flag(negative_bp, (value >> 7) & 1); */ \
}
#define lz_set_z_uptodate(lz_zn_result) { lz_zn_result &= ~(1 << 31); }
#define lz_set_n_uptodate(lz_zn_result) { lz_zn_result &= ~(1 << 25); }
#define lz_query_z(lz_zn_result) ((lz_zn_result & 0xff) == 0)
#define lz_compute_z(lz_zn_result) { \
if (lz_is_z_lazy(lz_zn_result)) { \
cpu.P[zero_bp] = (lz_zn_result & 0xff) == 0; \
lz_set_z_uptodate(lz_zn_result); \
} \
cpu_modify_flag(zero_bp, lz_query_z(lz_zn_result)); \
}
#define lz_compute_n(lz_zn_result) { \
if (lz_is_n_lazy(lz_zn_result)) { \
cpu.P[negative_bp] = (lz_zn_result >> 7) & 1; \
cpu_modify_flag(negative_bp, lz_zn_result & 0x80); \
lz_set_n_uptodate(lz_zn_result); \
} \
}
@ -314,9 +271,9 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
// Addition
#define cpu_op_adc(in_zero_page) { \
int result = (Areg & 0xff) + op_value + cpu_flag_set(carry_bp); \
cpu_modify_flag(carry_bp, (result & 0x100)); \
cpu_modify_flag(overflow_bp, (~(Areg ^ op_value) & (Areg ^ result) & 0x80)); \
int result = (Areg & 0xff) + op_value + cpu_flag_setbit(carry_bp); \
cpu_modify_flag(carry_bp, result & 0x100); \
cpu_modify_flag(overflow_bp, ~(Areg ^ op_value) & (Areg ^ result) & 0x80); \
cpu_update_zn_flags(result); \
Areg = result; \
}
@ -324,9 +281,9 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
// Subtraction
#define cpu_op_sbc(in_zero_page) { \
uint32_t result = (Areg & 0xff) - op_value - !cpu_flag_set(carry_bp); \
uint32_t result = (Areg & 0xff) - op_value - !cpu_flag_setbit(carry_bp); \
cpu_modify_flag(carry_bp, !(result & 0x100)); \
cpu_modify_flag(overflow_bp, ((Areg ^ op_value) & (Areg ^ result) & 0x80)); \
cpu_modify_flag(overflow_bp, (Areg ^ op_value) & (Areg ^ result) & 0x80); \
cpu_update_zn_flags(result & 0xff); \
Areg = result; \
}
@ -335,11 +292,10 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
#define cpu_op_and(in_zero_page) { cpu_update_zn_flags(Areg &= op_value); }
#define cpu_op_bit(in_zero_page) { \
cpu_modify_flag(overflow_bp, (op_value >> 6) & 0x1); \
cpu_modify_flag(zero_bp, !(Areg & op_value & 0xff)); \
cpu_modify_flag(negative_bp, (op_value >> 7) & 0x1); \
/*cpu_update_zn_flags(op_value); */\
lz_set_z_uptodate(lz_zn_result); \
cpu_modify_flag(overflow_bp, op_value & 0x40); \
/*cpu_modify_flag(zero_bp, (Areg & op_value & 0xff) == 0); */\
cpu_update_zn_flags(Areg & op_value); \
cpu_modify_flag(negative_bp, op_value & 0x80); \
lz_set_n_uptodate(lz_zn_result); \
}
#define cpu_op_eor(in_zero_page) { cpu_update_zn_flags(Areg ^= op_value); }
@ -374,28 +330,28 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
#define cpu_op_rola(in_zero_page) { \
uint32_t value = (Areg & 0xff) << 1; \
value |= cpu_flag_set(carry_bp); \
cpu_modify_flag(carry_bp, value > 0xFF); \
value |= cpu_flag_setbit(carry_bp); \
cpu_modify_flag(carry_bp, value & 0x100); \
cpu_update_zn_flags(value); \
Areg = value; \
}
#define cpu_op_rol(in_zero_page) { \
op_value <<= 1; \
op_value |= cpu_flag_set(carry_bp); \
cpu_modify_flag(carry_bp, op_value > 0xFF); \
op_value |= cpu_flag_setbit(carry_bp); \
cpu_modify_flag(carry_bp, op_value & 0x100); \
/* op_value &= 0xFF; */\
cpu_update_zn_flags(op_value); \
if (in_zero_page) { cpu_ram_write(op_address, op_value); } \
else { memory_writeb(op_address, op_value); } \
}
#define cpu_op_rora(in_zero_page) { \
unsigned char carry = cpu_flag_set(carry_bp); \
unsigned char carry = cpu_flag_setbit(carry_bp); \
cpu_modify_flag(carry_bp, Areg & 0x01); \
Areg = ((Areg & 0xff) >> 1) | (carry << 7); \
cpu_update_zn_flags(Areg); \
}
#define cpu_op_ror(in_zero_page) { \
unsigned char carry = cpu_flag_set(carry_bp); \
unsigned char carry = cpu_flag_setbit(carry_bp); \
cpu_modify_flag(carry_bp, op_value & 0x01); \
op_value = (op_value >> 1) | (carry << 7); \
cpu_update_zn_flags(op_value); \
@ -436,21 +392,23 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
#define cpu_op_txs(in_zero_page) { cpu.SP = Xreg & 0xff; }
#define cpu_branch(flag) do { \
if (flag) { \
PCreg = op_address; \
} \
if (flag) { \
int8_t offset = instr_fetch(PCreg); \
PCreg += offset + 1; \
} \
else PCreg ++; \
} while(0)
// Branching Positive
#define cpu_op_bcs(in_zero_page) { cpu_branch(cpu_flag_set(carry_bp)); }
#define cpu_op_beq(in_zero_page) { lz_compute_z(lz_zn_result); cpu_branch(cpu_flag_set(zero_bp)); }
#define cpu_op_beq(in_zero_page) { cpu_branch(lz_query_z(lz_zn_result)); }
#define cpu_op_bmi(in_zero_page) { lz_compute_n(lz_zn_result); cpu_branch(cpu_flag_set(negative_bp)); }
#define cpu_op_bvs(in_zero_page) { cpu_branch(cpu_flag_set(overflow_bp)); }
// Branching Negative
#define cpu_op_bne(in_zero_page) { lz_compute_z(lz_zn_result); cpu_branch(!cpu_flag_set(zero_bp)); }
#define cpu_op_bne(in_zero_page) { cpu_branch(!lz_query_z(lz_zn_result)); }
#define cpu_op_bcc(in_zero_page) { cpu_branch(!cpu_flag_set(carry_bp)); }
#define cpu_op_bpl(in_zero_page) { lz_compute_n(lz_zn_result); cpu_branch(!cpu_flag_set(negative_bp)); }
#define cpu_op_bvc(in_zero_page) { cpu_branch(!cpu_flag_set(overflow_bp)); }
@ -477,7 +435,7 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
}
#define cpu_op_rti(in_zero_page) { \
byte_unpack(cpu.P, cpu_stack_popb()); \
lz_set_z_uptodate(lz_zn_result); \
cpu_update_zn_flags(!cpu.P[zero_bp]); \
lz_set_n_uptodate(lz_zn_result); \
cpu.P[unused_bp] = 1; \
PCreg = cpu_stack_popw(); \
@ -535,7 +493,7 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
byte_unpack(cpu.P, cpu_stack_popb()); \
cpu.P[break_bp] = 0; \
cpu.P[unused_bp] = 1; \
lz_set_z_uptodate(lz_zn_result); \
cpu_update_zn_flags(!cpu.P[zero_bp]); \
lz_set_n_uptodate(lz_zn_result); \
}
@ -615,240 +573,6 @@ static inline void ____FE____() { /* Instruction for future Extension */ }
void cpu_init()
{
/*
CPU_OP_BIS(00, 7, brk, "BRK", implied)
CPU_OP_BIS(01, 6, ora, "ORA", indirect_x)
CPU_OP_BIS(05, 3, ora, "ORA", zero_page)
CPU_OP_BIS(06, 5, asl, "ASL", zero_page)
CPU_OP_BIS(08, 3, php, "PHP", implied)
CPU_OP_BIS(09, 2, ora, "ORA", immediate)
CPU_OP_BIS(0A, 2, asla,"ASL", implied)
CPU_OP_BIS(0D, 4, ora, "ORA", absolute)
CPU_OP_BIS(0E, 6, asl, "ASL", absolute)
CPU_OP_BIS(10, 2, bpl, "BPL", relative)
CPU_OP_BIS(11, 5, ora, "ORA", indirect_y)
CPU_OP_BIS(15, 4, ora, "ORA", zero_page_x)
CPU_OP_BIS(16, 6, asl, "ASL", zero_page_x)
CPU_OP_BIS(18, 2, clc, "CLC", implied)
CPU_OP_BIS(19, 4, ora, "ORA", absolute_y)
CPU_OP_BIS(1D, 4, ora, "ORA", absolute_x)
CPU_OP_BIS(1E, 7, asl, "ASL", absolute_x)
CPU_OP_BIS(20, 6, jsr, "JSR", absolute)
CPU_OP_BIS(21, 6, and, "AND", indirect_x)
CPU_OP_BIS(24, 3, bit, "BIT", zero_page)
CPU_OP_BIS(25, 3, and, "AND", zero_page)
CPU_OP_BIS(26, 5, rol, "ROL", zero_page)
CPU_OP_BIS(28, 4, plp, "PLP", implied)
CPU_OP_BIS(29, 2, and, "AND", immediate)
CPU_OP_BIS(2A, 2, rola,"ROL", implied)
CPU_OP_BIS(2C, 4, bit, "BIT", absolute)
CPU_OP_BIS(2D, 2, and, "AND", absolute)
CPU_OP_BIS(2E, 6, rol, "ROL", absolute)
CPU_OP_BIS(30, 2, bmi, "BMI", relative)
CPU_OP_BIS(31, 5, and, "AND", indirect_y)
CPU_OP_BIS(35, 4, and, "AND", zero_page_x)
CPU_OP_BIS(36, 6, rol, "ROL", zero_page_x)
CPU_OP_BIS(38, 2, sec, "SEC", implied)
CPU_OP_BIS(39, 4, and, "AND", absolute_y)
CPU_OP_BIS(3D, 4, and, "AND", absolute_x)
CPU_OP_BIS(3E, 7, rol, "ROL", absolute_x)
CPU_OP_BIS(40, 6, rti, "RTI", implied)
CPU_OP_BIS(41, 6, eor, "EOR", indirect_x)
CPU_OP_BIS(45, 3, eor, "EOR", zero_page)
CPU_OP_BIS(46, 5, lsr, "LSR", zero_page)
CPU_OP_BIS(48, 3, pha, "PHA", implied)
CPU_OP_BIS(49, 2, eor, "EOR", immediate)
CPU_OP_BIS(4A, 2, lsra,"LSR", implied)
CPU_OP_BIS(4C, 3, jmp, "JMP", absolute)
CPU_OP_BIS(4D, 4, eor, "EOR", absolute)
CPU_OP_BIS(4E, 6, lsr, "LSR", absolute)
CPU_OP_BIS(50, 2, bvc, "BVC", relative)
CPU_OP_BIS(51, 5, eor, "EOR", indirect_y)
CPU_OP_BIS(55, 4, eor, "EOR", zero_page_x)
CPU_OP_BIS(56, 6, lsr, "LSR", zero_page_x)
CPU_OP_BIS(58, 2, cli, "CLI", implied)
CPU_OP_BIS(59, 4, eor, "EOR", absolute_y)
CPU_OP_BIS(5D, 4, eor, "EOR", absolute_x)
CPU_OP_BIS(5E, 7, lsr, "LSR", absolute_x)
CPU_OP_BIS(60, 6, rts, "RTS", implied)
CPU_OP_BIS(61, 6, adc, "ADC", indirect_x)
CPU_OP_BIS(65, 3, adc, "ADC", zero_page)
CPU_OP_BIS(66, 5, ror, "ROR", zero_page)
CPU_OP_BIS(68, 4, pla, "PLA", implied)
CPU_OP_BIS(69, 2, adc, "ADC", immediate)
CPU_OP_BIS(6A, 2, rora,"ROR", implied)
CPU_OP_BIS(6C, 5, jmp, "JMP", indirect)
CPU_OP_BIS(6D, 4, adc, "ADC", absolute)
CPU_OP_BIS(6E, 6, ror, "ROR", absolute)
CPU_OP_BIS(70, 2, bvs, "BVS", relative)
CPU_OP_BIS(71, 5, adc, "ADC", indirect_y)
CPU_OP_BIS(75, 4, adc, "ADC", zero_page_x)
CPU_OP_BIS(76, 6, ror, "ROR", zero_page_x)
CPU_OP_BIS(78, 2, sei, "SEI", implied)
CPU_OP_BIS(79, 4, adc, "ADC", absolute_y)
CPU_OP_BIS(7D, 4, adc, "ADC", absolute_x)
CPU_OP_BIS(7E, 7, ror, "ROR", absolute_x)
CPU_OP_BIS(81, 6, sta, "STA", indirect_x)
CPU_OP_BIS(84, 3, sty, "STY", zero_page)
CPU_OP_BIS(85, 3, sta, "STA", zero_page)
CPU_OP_BIS(86, 3, stx, "STX", zero_page)
CPU_OP_BIS(88, 2, dey, "DEY", implied)
CPU_OP_BIS(8A, 2, txa, "TXA", implied)
CPU_OP_BIS(8C, 4, sty, "STY", absolute)
CPU_OP_BIS(8D, 4, sta, "STA", absolute)
CPU_OP_BIS(8E, 4, stx, "STX", absolute)
CPU_OP_BIS(90, 2, bcc, "BCC", relative)
CPU_OP_BIS(91, 6, sta, "STA", indirect_y)
CPU_OP_BIS(94, 4, sty, "STY", zero_page_x)
CPU_OP_BIS(95, 4, sta, "STA", zero_page_x)
CPU_OP_BIS(96, 4, stx, "STX", zero_page_y)
CPU_OP_BIS(98, 2, tya, "TYA", implied)
CPU_OP_BIS(99, 5, sta, "STA", absolute_y)
CPU_OP_BIS(9A, 2, txs, "TXS", implied)
CPU_OP_BIS(9D, 5, sta, "STA", absolute_x)
CPU_OP_BIS(A0, 2, ldy, "LDY", immediate)
CPU_OP_BIS(A1, 6, lda, "LDA", indirect_x)
CPU_OP_BIS(A2, 2, ldx, "LDX", immediate)
CPU_OP_BIS(A4, 3, ldy, "LDY", zero_page)
CPU_OP_BIS(A5, 3, lda, "LDA", zero_page)
CPU_OP_BIS(A6, 3, ldx, "LDX", zero_page)
CPU_OP_BIS(A8, 2, tay, "TAY", implied)
CPU_OP_BIS(A9, 2, lda, "LDA", immediate)
CPU_OP_BIS(AA, 2, tax, "TAX", implied)
CPU_OP_BIS(AC, 4, ldy, "LDY", absolute)
CPU_OP_BIS(AD, 4, lda, "LDA", absolute)
CPU_OP_BIS(AE, 4, ldx, "LDX", absolute)
CPU_OP_BIS(B0, 2, bcs, "BCS", relative)
CPU_OP_BIS(B1, 5, lda, "LDA", indirect_y)
CPU_OP_BIS(B4, 4, ldy, "LDY", zero_page_x)
CPU_OP_BIS(B5, 4, lda, "LDA", zero_page_x)
CPU_OP_BIS(B6, 4, ldx, "LDX", zero_page_y)
CPU_OP_BIS(B8, 2, clv, "CLV", implied)
CPU_OP_BIS(B9, 4, lda, "LDA", absolute_y)
CPU_OP_BIS(BA, 2, tsx, "TSX", implied)
CPU_OP_BIS(BC, 4, ldy, "LDY", absolute_x)
CPU_OP_BIS(BD, 4, lda, "LDA", absolute_x)
CPU_OP_BIS(BE, 4, ldx, "LDX", absolute_y)
CPU_OP_BIS(C0, 2, cpy, "CPY", immediate)
CPU_OP_BIS(C1, 6, cmp, "CMP", indirect_x)
CPU_OP_BIS(C4, 3, cpy, "CPY", zero_page)
CPU_OP_BIS(C5, 3, cmp, "CMP", zero_page)
CPU_OP_BIS(C6, 5, dec, "DEC", zero_page)
CPU_OP_BIS(C8, 2, iny, "INY", implied)
CPU_OP_BIS(C9, 2, cmp, "CMP", immediate)
CPU_OP_BIS(CA, 2, dex, "DEX", implied)
CPU_OP_BIS(CC, 4, cpy, "CPY", absolute)
CPU_OP_BIS(CD, 4, cmp, "CMP", absolute)
CPU_OP_BIS(CE, 6, dec, "DEC", absolute)
CPU_OP_BIS(D0, 2, bne, "BNE", relative)
CPU_OP_BIS(D1, 5, cmp, "CMP", indirect_y)
CPU_OP_BIS(D5, 4, cmp, "CMP", zero_page_x)
CPU_OP_BIS(D6, 6, dec, "DEC", zero_page_x)
CPU_OP_BIS(D8, 2, cld, "CLD", implied)
CPU_OP_BIS(D9, 4, cmp, "CMP", absolute_y)
CPU_OP_BIS(DD, 4, cmp, "CMP", absolute_x)
CPU_OP_BIS(DE, 7, dec, "DEC", absolute_x)
CPU_OP_BIS(E0, 2, cpx, "CPX", immediate)
CPU_OP_BIS(E1, 6, sbc, "SBC", indirect_x)
CPU_OP_BIS(E4, 3, cpx, "CPX", zero_page)
CPU_OP_BIS(E5, 3, sbc, "SBC", zero_page)
CPU_OP_BIS(E6, 5, inc, "INC", zero_page)
CPU_OP_BIS(E8, 2, inx, "INX", implied)
CPU_OP_BIS(E9, 2, sbc, "SBC", immediate)
CPU_OP_BIS(EA, 2, nop, "NOP", implied)
CPU_OP_BIS(EC, 4, cpx, "CPX", absolute)
CPU_OP_BIS(ED, 4, sbc, "SBC", absolute)
CPU_OP_BIS(EE, 6, inc, "INC", absolute)
CPU_OP_BIS(F0, 2, beq, "BEQ", relative)
CPU_OP_BIS(F1, 5, sbc, "SBC", indirect_y)
CPU_OP_BIS(F5, 4, sbc, "SBC", zero_page_x)
CPU_OP_BIS(F6, 6, inc, "INC", zero_page_x)
CPU_OP_BIS(F8, 2, sed, "SED", implied)
CPU_OP_BIS(F9, 4, sbc, "SBC", absolute_y)
CPU_OP_BIS(FD, 4, sbc, "SBC", absolute_x)
CPU_OP_BIS(FE, 7, inc, "INC", absolute_x)
CPU_OP_EIS(03, 8, aso, "SLO", indirect_x)
CPU_OP_EIS(07, 5, aso, "SLO", zero_page)
CPU_OP_EIS(0F, 6, aso, "SLO", absolute)
CPU_OP_EIS(13, 8, aso, "SLO", indirect_y)
CPU_OP_EIS(17, 6, aso, "SLO", zero_page_x)
CPU_OP_EIS(1B, 7, aso, "SLO", absolute_y)
CPU_OP_EIS(1F, 7, aso, "SLO", absolute_x)
CPU_OP_EIS(23, 8, rla, "RLA", indirect_x)
CPU_OP_EIS(27, 5, rla, "RLA", zero_page)
CPU_OP_EIS(2F, 6, rla, "RLA", absolute)
CPU_OP_EIS(33, 8, rla, "RLA", indirect_y)
CPU_OP_EIS(37, 6, rla, "RLA", zero_page_x)
CPU_OP_EIS(3B, 7, rla, "RLA", absolute_y)
CPU_OP_EIS(3F, 7, rla, "RLA", absolute_x)
CPU_OP_EIS(43, 8, lse, "SRE", indirect_x)
CPU_OP_EIS(47, 5, lse, "SRE", zero_page)
CPU_OP_EIS(4F, 6, lse, "SRE", absolute)
CPU_OP_EIS(53, 8, lse, "SRE", indirect_y)
CPU_OP_EIS(57, 6, lse, "SRE", zero_page_x)
CPU_OP_EIS(5B, 7, lse, "SRE", absolute_y)
CPU_OP_EIS(5F, 7, lse, "SRE", absolute_x)
CPU_OP_EIS(63, 8, rra, "RRA", indirect_x)
CPU_OP_EIS(67, 5, rra, "RRA", zero_page)
CPU_OP_EIS(6F, 6, rra, "RRA", absolute)
CPU_OP_EIS(73, 8, rra, "RRA", indirect_y)
CPU_OP_EIS(77, 6, rra, "RRA", zero_page_x)
CPU_OP_EIS(7B, 7, rra, "RRA", absolute_y)
CPU_OP_EIS(7F, 7, rra, "RRA", absolute_x)
CPU_OP_EIS(83, 6, axs, "SAX", indirect_x)
CPU_OP_EIS(87, 3, axs, "SAX", zero_page)
CPU_OP_EIS(8F, 4, axs, "SAX", absolute)
CPU_OP_EIS(93, 6, axa, "SAX", indirect_y)
CPU_OP_EIS(97, 4, axs, "SAX", zero_page_y)
CPU_OP_EIS(9F, 5, axa, "SAX", absolute_y)
CPU_OP_EIS(A3, 6, lax, "LAX", indirect_x)
CPU_OP_EIS(A7, 3, lax, "LAX", zero_page)
CPU_OP_EIS(AF, 4, lax, "LAX", absolute)
CPU_OP_EIS(B3, 5, lax, "LAX", indirect_y)
CPU_OP_EIS(B7, 4, lax, "LAX", zero_page_y)
CPU_OP_EIS(BF, 4, lax, "LAX", absolute_y)
CPU_OP_EIS(C3, 8, dcm, "DCP", indirect_x)
CPU_OP_EIS(C7, 5, dcm, "DCP", zero_page)
CPU_OP_EIS(CF, 6, dcm, "DCP", absolute)
CPU_OP_EIS(D3, 8, dcm, "DCP", indirect_y)
CPU_OP_EIS(D7, 6, dcm, "DCP", zero_page_x)
CPU_OP_EIS(DB, 7, dcm, "DCP", absolute_y)
CPU_OP_EIS(DF, 7, dcm, "DCP", absolute_x)
CPU_OP_EIS(E3, 8, ins, "ISB", indirect_x)
CPU_OP_EIS(E7, 5, ins, "ISB", zero_page)
CPU_OP_EIS(EB, 2, sbc, "SBC", immediate)
CPU_OP_EIS(EF, 6, ins, "ISB", absolute)
CPU_OP_EIS(F3, 8, ins, "ISB", indirect_y)
CPU_OP_EIS(F7, 6, ins, "ISB", zero_page_x)
CPU_OP_EIS(FB, 7, ins, "ISB", absolute_y)
CPU_OP_EIS(FF, 7, ins, "ISB", absolute_x)
CPU_OP_NII(04, zero_page)
CPU_OP_NII(0C, absolute)
CPU_OP_NII(14, zero_page_x)
CPU_OP_NII(1A, implied)
CPU_OP_NII(1C, absolute_x)
CPU_OP_NII(34, zero_page_x)
CPU_OP_NII(3A, implied)
CPU_OP_NII(3C, absolute_x)
CPU_OP_NII(44, zero_page)
CPU_OP_NII(54, zero_page_x)
CPU_OP_NII(5A, implied)
CPU_OP_NII(5C, absolute_x)
CPU_OP_NII(64, zero_page)
CPU_OP_NII(74, zero_page_x)
CPU_OP_NII(7A, implied)
CPU_OP_NII(7C, absolute_x)
CPU_OP_NII(80, immediate)
CPU_OP_NII(D4, zero_page_x)
CPU_OP_NII(DA, implied)
CPU_OP_NII(DC, absolute_x)
CPU_OP_NII(F4, zero_page_x)
CPU_OP_NII(FA, implied)
CPU_OP_NII(FC, absolute_x)
*/
//cpu.P = 0x24;
cpu.P[interrupt_bp] = 1;
cpu.P[unused_bp] = 1;
@ -915,6 +639,7 @@ void cpu_run(long cycles)
{
int lz_zn_result = g_lz_zn_result; // for zero and negative flags
uint32_t PCreg = cpu.PC, Areg = cpu.A, Xreg = cpu.X, Yreg = cpu.Y;
cycles /= 3;
long c = cycles;
while (cycles > 0) {
#ifdef STATISTIC

View File

@ -6,10 +6,10 @@
#include <amdev.h>
//#define NOGUI
//#define PROFILE
int key_state[256];
int frame_cnt;
static int frame_cnt;
bool do_update = false;
static byte *buf;
typedef struct {
@ -110,25 +110,16 @@ void fce_run()
wait_for_frame();
int scanlines = 262;
#ifdef PROFILE
uint32_t ppu_time = 0;
uint32_t cpu_time = 0;
while (scanlines-- > 0) {
uint32_t t0 = uptime();
ppu_run(1);
uint32_t t1 = uptime();
cpu_run(1364 / 12); // 1 scanline
uint32_t t2 = uptime();
ppu_time += t1 - t0;
cpu_time += t2 - t1;
ppu_cycle();
int key = read_key();
for (; key != _KEY_NONE; key = read_key()) {
int down = (key & 0x8000) != 0;
int code = key & ~0x8000;
key_state[code] = down;
}
}
printf("ppu time = %d, cpu time = %d\n", ppu_time, cpu_time);
#else
while (scanlines-- > 0) {
ppu_run(1);
cpu_run(1364 / 12); // 1 scanline
}
#endif
nr_draw ++;
if (uptime() - last > 1000) {
@ -136,14 +127,6 @@ void fce_run()
printf("FPS = %d\n", nr_draw);
nr_draw = 0;
}
int key = read_key();
log("readkey:%d\n", key);
if (key != _KEY_NONE) {
int down = (key & 0x8000) != 0;
int code = key & ~0x8000;
key_state[code] = down;
}
}
}
@ -169,20 +152,25 @@ byte canvas[257][520];
static int xmap[1024];
static uint32_t row[1024];
#else
uint32_t screen[H][W];
// add align attribute here to enable fast memcpy
uint32_t screen[H][W + 8 + 256] __attribute((aligned(8)));
#endif
void fce_update_screen() {
do_update = (frame_cnt == 0);
frame_cnt ++;
#ifdef NOGUI
if (frame_cnt % 1000 == 0) printf("Frame %d (%d FPS)\n", frame_cnt, frame_cnt * 1000 / uptime());
// if (frame_cnt % 1000 == 0) printf("Frame %d (%d FPS)\n", frame_cnt, frame_cnt * 1000 / uptime());
return;
#endif
if (frame_cnt % 3 != 0) return;
if (frame_cnt != 2) return;
frame_cnt = -1;
int w = screen_width();
int h = screen_height();
int idx = ppu_read_idx();
#ifdef STRETCH
int pad = (w - h) / 2;
for (int y = 0; y < h; y ++) {
@ -192,30 +180,35 @@ void fce_update_screen() {
}
draw_rect(row + pad, pad, y, w - 2 * pad, 1);
}
assert(sizeof(byte) == 1);
memset(canvas, idx, sizeof(canvas));
#else
int xpad = (w - W) / 2;
int ypad = (h - H) / 2;
assert(xpad >= 0 && ypad >= 0);
draw_rect(&screen[0][0], xpad, ypad, W, H);
for (int y = 0; y < H; y ++) {
draw_rect(&screen[y][256], xpad, ypad + y, W, 1);
}
// draw_rect(&screen[0][0], xpad, ypad, W, H);
int nr64 = sizeof(screen[0][0]) * W / sizeof(uint64_t);
int i;
uint64_t v = ((uint64_t)palette[idx] << 32) | palette[idx];
for (int y = 0; y < H; y ++) {
uint64_t *p = (void *)&screen[y][256];
for (i = 0; i < nr64; i += 8) {
#define macro(x) p[i + x] = v
macro(0); macro(1); macro(2); macro(3);
macro(4); macro(5); macro(6); macro(7);
}
}
#endif
draw_sync();
int idx = ppu_read_idx();
#ifdef STRETCH
assert(sizeof(byte) == 1);
memset(canvas, idx, sizeof(canvas));
#else
int nr64 = sizeof(screen) / sizeof(uint64_t);
int i;
uint64_t v = ((uint64_t)palette[idx] << 32) | palette[idx];
uint64_t *p = (void *)screen;
for (i = 0; i < nr64; i += 8) {
#define macro(x) p[i + x] = v
macro(0); macro(1); macro(2); macro(3);
macro(4); macro(5); macro(6); macro(7);
}
#endif
}
void xmap_init() {

View File

@ -6,16 +6,11 @@
byte mmc_id;
byte mmc_chr_pages[MMC_MAX_PAGE_COUNT][0x2000];
int mmc_chr_pages_number;
static byte mmc_chr_pages[MMC_MAX_PAGE_COUNT][0x2000];
static int mmc_chr_pages_number;
byte memory[0x10000];
inline byte mmc_read(word address)
{
return memory[address];
}
inline void mmc_write(word address, byte data)
{
memory[address] = data;

View File

@ -5,23 +5,30 @@
#include <klib.h>
#include <stdint.h>
//#define PROFILE
//#define HAS_US_TIMER
static PPU_STATE ppu;
static byte PPU_RAM[0x4000];
static bool ppu_2007_first_read;
static byte ppu_addr_latch;
static bool ppu_sprite_hit_occured = false;
static uint16_t ppu_screen_background[264][264 / 8];
static const word ppu_base_nametable_addresses[4] = { 0x2000, 0x2400, 0x2800, 0x2C00 };
// sprite
byte PPU_SPRRAM[0x100];
byte PPU_RAM[0x4000];
bool ppu_2007_first_read;
byte ppu_addr_latch;
PPU_STATE ppu;
byte ppu_latch;
bool ppu_sprite_hit_occured = false;
byte ppu_screen_background[264][264];
typedef struct {
uint8_t y, tile, atr, x;
} SPR;
static const SPR *spr_array = (void *)PPU_SPRRAM;
// preprocess tables
static byte XHL[256][256][8]; // each valus is 0~3
static uint64_t XHL64[256][256];
static uint64_t XHLmask[256][256];
static byte XHL[256 * 256][8]; // each valus is 0~3
static uint32_t ppu_ram_map[0x4000];
static uint16_t XHL16[256 * 256];
static uint16_t XHLmask16[256 * 256];
// PPUCTRL Functions
@ -39,6 +46,8 @@ static inline void ppu_update_PPUCTRL_internal(byte PPUCTRL) {
background_pattern_table_address = common_bit_set(PPUCTRL, 4) ? 0x1000 : 0x0000;
sprite_height = common_bit_set(PPUCTRL, 5) ? 16 : 8;
generates_nmi = common_bit_set(PPUCTRL, 7);
ppu.PPUCTRL = PPUCTRL;
}
inline bool ppu_generates_nmi() { return generates_nmi; }
@ -117,6 +126,7 @@ static inline uint32_t ppu_ram_read_fast(uint32_t address)
static inline void ppu_ram_write(word address, byte data)
{
//assert(!(ppu_shows_background() && ppu_shows_sprites()));
PPU_RAM[ppu_ram_map[address]] = data;
}
@ -156,30 +166,16 @@ int ppu_read_idx(void) {
// 3F1F = 11 (00010001)
// 3F20 = 2B (00101011)
static inline uint64_t int_shl(int n, int s) {
// compute (uint64_t)n << s without generating shld instruction in x86
int hi, lo;
if (s < 32) {
lo = n << s;
hi = (s == 0 ? 0 : n >> (32 - s));
}
else {
lo = 0;
hi = n << (s - 32);
}
return ((uint64_t)hi << 32) | lo;
}
// Rendering
static void table_init() {
for (int h = 0; h < 256; h ++)
for (int l = 0; l < 256; l ++) {
for (int x = 0; x < 8; x ++) {
int col = (((h >> (7 - x)) & 1) << 1) | ((l >> (7 - x)) & 1);
XHL[h][l][x] = col;
XHL64[h][l] |= int_shl(col, x * 8);
XHL[h * 256 + l][x] = col;
XHL16[h * 256 + l] |= col << (x * 2);
if (col == 0) {
XHLmask[h][l] |= int_shl(0xff,x * 8);
XHLmask16[h * 256] |= 0x3 << (x * 2);
}
}
}
@ -190,172 +186,256 @@ static void table_init() {
log("ppu_ram_map[0x3F00]=0x%x\n", ppu_ram_map[0x3F00]);
}
static int palette_cache[4][4];
static uint32_t color_cache[4][4];
static uint32_t sprite_color_cache[4][4];
void palette_cache_read() {
static void make_color_cache(void) {
extern const uint32_t palette[64];
int i;
for (i = 0; i < 4; i ++) {
word palette_address = 0x3F00 + (i << 2);
uint32_t palette_address = 0x3F00 + (i << 2);
// still in the range of identify mapping, can bypass ppu_ram_map[]
palette_cache[i][1] = ppu_ram_read_fast(palette_address + 1);
palette_cache[i][2] = ppu_ram_read_fast(palette_address + 2);
palette_cache[i][3] = ppu_ram_read_fast(palette_address + 3);
color_cache[i][1] = palette[ppu_ram_read_fast(palette_address + 1)];
color_cache[i][2] = palette[ppu_ram_read_fast(palette_address + 2)];
color_cache[i][3] = palette[ppu_ram_read_fast(palette_address + 3)];
palette_address = 0x3F10 + (i << 2);
// still in the range of identify mapping, can bypass ppu_ram_map[]
sprite_color_cache[i][1] = palette[ppu_ram_read_fast(palette_address + 1)];
sprite_color_cache[i][2] = palette[ppu_ram_read_fast(palette_address + 2)];
sprite_color_cache[i][3] = palette[ppu_ram_read_fast(palette_address + 3)];
}
}
static uint32_t palette_attr_cache[4][256 >> 4][W >> 5];
static void make_attr_cache(void) {
uint32_t palette_attr;
int x, y, i;
for (i = 0; i < 4; i ++) {
uint32_t attribute_address = ppu_base_nametable_addresses[i] + 0x3C0;
for (y = 0; y < (256 >> 4); y += 2) {
for (x = 0; x < W >> 5; x ++) {
palette_attr = ppu_ram_read_fast(attribute_address);
bool left = x < 4;
if (!left) { palette_attr >>= 2; }
// !top
palette_attr_cache[i][y + 1][x] = (palette_attr >> 4) & 3;
// top
palette_attr_cache[i][y][x] = palette_attr & 3;
attribute_address ++;
}
}
}
}
static uint32_t sprite_list[256][64];
static uint8_t sprite_list_cnt[256];
static void make_sprite_list(void) {
memset(sprite_list_cnt, 0, sizeof(sprite_list_cnt));
int i;
for (i = 0; i < 64; i ++) {
int y = spr_array[i].y;
if (y < 0xef) {
#define macro(x) sprite_list[y + x][sprite_list_cnt[y + x] ++] = i
macro(0); macro(1); macro(2); macro(3);
macro(4); macro(5); macro(6); macro(7);
if (sprite_height == 16) {
macro( 8); macro( 9); macro(10); macro(11);
macro(12); macro(13); macro(14); macro(15);
}
#undef macro
}
}
}
static void ppu_preprocess(void) {
make_color_cache();
make_attr_cache();
make_sprite_list();
}
extern bool do_update;
void ppu_draw_background_scanline(bool mirror) {
int tile_x, tile_y = ppu.scanline >> 3;
int taddr = base_nametable_address + (tile_y << 5) + (mirror ? 0x400 : 0);
int y_in_tile = ppu.scanline & 0x7;
int scroll_base = - ppu.PPUSCROLL_X + (mirror ? 256 : 0);
int taddr = base_nametable_address | (tile_y << 5);
int pattern_table_base = background_pattern_table_address | (ppu.scanline & 0x7);
int do_update = frame_cnt % 3 == 0;
bool top = (ppu.scanline & 31) < 16;
uint32_t attribute_address = (base_nametable_address + (mirror ? 0x400 : 0) + 0x3C0 + -1 + ((ppu.scanline >> 5) << 3));
uint32_t palette_attribute = ppu_ram_read_fast(attribute_address);
if (!top) {
palette_attribute >>= 4;
int scroll_base = 256 - ppu.PPUSCROLL_X;
int tile_x_max = 32;
if (mirror) {
scroll_base += 256;
taddr += 0x400;
// Skipping off-screen pixels
tile_x_max = (ppu.PPUSCROLL_X >> 3) + 1;
}
palette_attribute &= 3;
int *palette_cache_line = palette_cache[palette_attribute];
int off_screen_idx = 256 + ppu.PPUSCROLL_X - (mirror ? 256 : 0);
for (tile_x = ppu_shows_background_in_leftmost_8px() ? 0 : 1; tile_x < 32; tile_x++) {
// Skipping off-screen pixels
if ((tile_x << 3) > off_screen_idx)
continue;
uint32_t *p_palette_attribute = &palette_attr_cache[(ppu.PPUCTRL & 0x3) + mirror]
[ppu.scanline >> 4][0];
for (tile_x = ppu_shows_background_in_leftmost_8px() ? 0 : 1; tile_x < tile_x_max; tile_x ++) {
int tile_index = ppu_ram_read_fast(taddr);
uint32_t tile_address = background_pattern_table_address + (tile_index << 4);
uint32_t tile_address = pattern_table_base | (tile_index << 4);
uint32_t l = ppu_ram_read_fast(tile_address);
uint32_t XHLidx = (ppu_ram_read_fast(tile_address + 8) << 8) | l;
uint32_t l = ppu_ram_read_fast(tile_address + y_in_tile);
uint32_t h = ppu_ram_read_fast(tile_address + y_in_tile + 8);
// most of the tiles of bg are transparent, which are unnecessary to process
if (XHLidx != 0) {
uint32_t color16 = XHL16[XHLidx];
uint16_t *ptr = &ppu_screen_background[ppu.scanline][tile_x];
*ptr = color16 | (XHLmask16[XHLidx] & (*ptr)) ;
if (do_update) {
if ((tile_x & 3) == 0) {
attribute_address ++;
palette_attribute = ppu_ram_read_fast(attribute_address);
bool left = (tile_x < 16);
if (!top) {
palette_attribute >>= 4;
}
if (!left) {
palette_attribute >>= 2;
}
palette_attribute &= 3;
palette_cache_line = palette_cache[palette_attribute];
}
union {
uint64_t u64;
byte color[8];
} buf;
buf.u64 = XHL64[h][l];
if (do_update) {
uint32_t *color_cache_line = color_cache[p_palette_attribute[tile_x >> 2]];
byte *pXHL = &XHL[XHLidx][0];
#define macro(x) \
if (buf.color[x] != 0) { \
draw(scroll_base + x, ppu.scanline + 1, palette_cache_line[buf.color[x]]); \
}
if (pXHL[x] != 0) { \
draw_color(scroll_base + x, ppu.scanline, color_cache_line[pXHL[x]]); \
} \
// loop unrolling
macro(0); macro(1); macro(2); macro(3);
macro(4); macro(5); macro(6); macro(7);
#undef macro
}
}
uint64_t *ptr = (uint64_t*)&ppu_screen_background[ppu.scanline][(tile_x << 3)];
*ptr = (XHL64[h][l]) | (XHLmask[h][l] & (*ptr)) ;
taddr ++;
scroll_base += 8;
}
}
void ppu_draw_sprite_scanline() {
int do_update = frame_cnt % 3 == 0;
int scanline_sprite_count = 0;
int i, n;
static inline void check_sprite0_hit(int XHLidx, int y, int hflip) {
int x;
if (!ppu_sprite_hit_occured && ppu_shows_background()) {
for (x = 0; x < 8; x ++) {
int color = XHL[XHLidx][ (hflip ? 7 - x : x) ];
if (color != 0) {
uint32_t bg16 = ppu_screen_background[y][(spr_array[0].x + x) >> 3];
uint32_t bg = (bg16 >> (((spr_array[0].x + x) & 0x7) * 2)) & 0x3;
if (bg == color) {
ppu_set_sprite_0_hit(true);
ppu_sprite_hit_occured = true;
}
}
}
}
}
int sprite_palette_cache[4][4];
for (i = 0; i < 4; i ++) {
uint32_t palette_address = 0x3F10 + (i << 2);
// still in the range of identify mapping, can bypass ppu_ram_map[]
sprite_palette_cache[i][1] = ppu_ram_read_fast(palette_address + 1);
sprite_palette_cache[i][2] = ppu_ram_read_fast(palette_address + 2);
sprite_palette_cache[i][3] = ppu_ram_read_fast(palette_address + 3);
void ppu_draw_sprite_scanline() {
int n, i;
int nr_sprite = sprite_list_cnt[ppu.scanline];
// PPU can't render > 8 sprites
if (nr_sprite > 8) {
ppu_set_sprite_overflow(true);
nr_sprite = 8;
}
for (n = 0; n < 0x100; n += 4) {
uint32_t sprite_x = PPU_SPRRAM[n + 3];
uint32_t sprite_y = PPU_SPRRAM[n];
for (i = 0; i < nr_sprite; i ++) {
n = sprite_list[ppu.scanline][i];
// Skip if sprite not on scanline
if (sprite_y > ppu.scanline || sprite_y + sprite_height < ppu.scanline)
continue;
bool vflip = spr_array[n].atr & 0x80;
bool hflip = spr_array[n].atr & 0x40;
scanline_sprite_count++;
// PPU can't render > 8 sprites
if (scanline_sprite_count > 8) {
ppu_set_sprite_overflow(true);
return;
// break;
}
bool vflip = PPU_SPRRAM[n + 2] & 0x80;
bool hflip = PPU_SPRRAM[n + 2] & 0x40;
uint32_t tile_address = sprite_pattern_table_address + 16 * PPU_SPRRAM[n + 1];
int y_in_tile = ppu.scanline & 0x7;
uint32_t l = ppu_ram_read_fast(tile_address + (vflip ? (7 - y_in_tile) : y_in_tile));
uint32_t h = ppu_ram_read_fast(tile_address + (vflip ? (7 - y_in_tile) : y_in_tile) + 8);
uint32_t y = spr_array[n].y + y_in_tile;
uint32_t tile_address = sprite_pattern_table_address + 16 * spr_array[n].tile + (vflip ? (7 - y_in_tile) : y_in_tile);
uint32_t l = ppu_ram_read_fast(tile_address);
uint32_t XHLidx = (ppu_ram_read_fast(tile_address + 8) << 8) | l;
uint32_t palette_attribute = PPU_SPRRAM[n + 2] & 0x3;
int *palette_cache_line = sprite_palette_cache[palette_attribute];
int x;
for (x = 0; x < 8; x++) {
int color = XHL[h][l][ (hflip ? 7 - x : x) ];
if (n == 0) check_sprite0_hit(XHLidx, y, hflip);
// Color 0 is transparent
if (color != 0) {
int screen_x = sprite_x + x;
if (do_update) {
uint32_t palette_attribute = spr_array[n].atr & 0x3;
uint32_t *color_cache_line = sprite_color_cache[palette_attribute];
uint32_t sprite_x = spr_array[n].x + 256;
if (do_update) {
draw(screen_x, sprite_y + y_in_tile + 1, palette_cache_line[color]);
}
// Checking sprite 0 hit
if (n == 0 && !ppu_sprite_hit_occured && ppu_shows_background() && ppu_screen_background[sprite_y + y_in_tile][screen_x] == color) {
ppu_set_sprite_0_hit(true);
ppu_sprite_hit_occured = true;
}
byte *pXHL = &XHL[XHLidx][0];
if (hflip) {
#define macro(x) \
if (pXHL[x] != 0) { \
draw_color(sprite_x + 7 - x, y, color_cache_line[pXHL[x]]); \
}
macro(0); macro(1); macro(2); macro(3);
macro(4); macro(5); macro(6); macro(7);
#undef macro
}
else {
#define macro(x) \
if (pXHL[x] != 0) { \
draw_color(sprite_x + x, y, color_cache_line[pXHL[x]]); \
}
macro(0); macro(1); macro(2); macro(3);
macro(4); macro(5); macro(6); macro(7);
#undef macro
}
}
}
}
static uint32_t background_time, sprite_time, cpu_time;
#ifdef PROFILE
#ifdef HAS_US_TIMER
# define time_read(x) read_us(&x)
# define time_diff(t1, t0) us_timediff(&t1, &t0)
# define TIME_TYPE amtime
#else
# define time_read(x) x = uptime()
# define time_diff(t1, t0) (t1 - t0)
# define TIME_TYPE uint32_t
#endif
#else
# define time_read(x)
# define time_diff(t1, t0) 0
#endif
// PPU Lifecycle
static inline void ppu_cycle() {
void ppu_cycle() {
#ifdef PROFILE
TIME_TYPE t0, t1, t2, t3, t4, t5;
#endif
if (!ppu.ready && cpu_clock() > 29658)
ppu.ready = true;
time_read(t0);
cpu_run(256);
time_read(t1);
ppu.scanline++;
if (ppu_shows_background()) {
// preprocessing
palette_cache_read();
if (ppu.scanline < H && ppu_shows_background()) {
ppu_draw_background_scanline(false);
ppu_draw_background_scanline(true);
}
if (ppu_shows_sprites()) {
time_read(t2);
cpu_run(85 - 16);
time_read(t3);
if (ppu.scanline < H && ppu_shows_sprites()) {
ppu_draw_sprite_scanline();
}
time_read(t4);
cpu_run(16);
time_read(t5);
cpu_time += time_diff(t1, t0) + time_diff(t3, t2) + time_diff(t5, t4);
background_time += time_diff(t2, t1);
sprite_time += time_diff(t4, t3);
if (ppu.scanline == 241) {
ppu_set_in_vblank(true);
ppu_set_sprite_0_hit(false);
@ -365,7 +445,19 @@ static inline void ppu_cycle() {
ppu.scanline = -1;
ppu_sprite_hit_occured = false;
ppu_set_in_vblank(false);
time_read(t0);
fce_update_screen();
time_read(t1);
#ifdef PROFILE
uint32_t total = cpu_time + background_time + sprite_time + time_diff(t1, t0);
printf("Time(us): cpu + bg + spr + scr = (%d + %d + %d + %d)\t= %d\n",
cpu_time, background_time, sprite_time, time_diff(t1, t0), total);
#endif
cpu_time = 0;
background_time = 0;
sprite_time = 0;
}
}
@ -392,22 +484,20 @@ inline byte ppu_io_read(word address)
ppu.scroll_received_x = 0;
ppu.PPUSCROLL = 0;
ppu.addr_received_high_byte = 0;
ppu_latch = value;
ppu_addr_latch = 0;
ppu_2007_first_read = true;
return value;
}
case 4: return ppu_latch = PPU_SPRRAM[ppu.OAMADDR];
case 4: return PPU_SPRRAM[ppu.OAMADDR];
case 7:
{
byte data;
if (ppu.PPUADDR < 0x3F00) {
data = ppu_latch = ppu_ram_read(ppu.PPUADDR);
data = ppu_ram_read_fast(ppu.PPUADDR);
}
else {
data = ppu_ram_read(ppu.PPUADDR);
ppu_latch = 0;
}
if (ppu_2007_first_read) {
@ -426,11 +516,13 @@ inline byte ppu_io_read(word address)
inline void ppu_io_write(word address, byte data)
{
address &= 7;
ppu_latch = data;
switch(address) {
switch (address & 7) {
case 0: if (ppu.ready) ppu_update_PPUCTRL_internal(data); break;
case 1: if (ppu.ready) byte_unpack(ppu.PPUMASK, data); break;
case 1: if (ppu.ready) byte_unpack(ppu.PPUMASK, data);
if (ppu_shows_background() && ppu_shows_sprites()) {
ppu_preprocess();
}
break;
case 3: ppu.OAMADDR = data; break;
case 4: PPU_SPRRAM[ppu.OAMADDR++] = data; break;
case 5:
@ -461,22 +553,15 @@ inline void ppu_io_write(word address, byte data)
}
case 7:
{
if (ppu.PPUADDR > 0x1FFF || ppu.PPUADDR < 0x4000) {
log("ppu_write.1(%x,%d)\n", ppu.PPUADDR ^ ppu.mirroring_xor, data);
if (ppu.PPUADDR > 0x1FFF && ppu.PPUADDR < 0x4000) {
ppu_ram_write(ppu.PPUADDR ^ ppu.mirroring_xor, data);
log("ppu_write.2(%x,%d)\n", ppu.PPUADDR, data);
ppu_ram_write(ppu.PPUADDR, data);
}
else {
log("ppu_write.3(%x,%d)\n", ppu.PPUADDR, data);
ppu_ram_write(ppu.PPUADDR, data);
}
ppu_ram_write(ppu.PPUADDR, data);
ppu.PPUADDR += vram_address_increment;
ppu.PPUADDR &= 0x3FFF;
}
}
ppu_latch = data;
}
void ppu_init()
@ -490,11 +575,6 @@ void ppu_init()
table_init();
}
void ppu_sprram_write(byte data)
{
PPU_SPRRAM[ppu.OAMADDR++] = data;
}
void ppu_set_background_color(byte color)
{
}

View File

@ -1,42 +1 @@
#include <psg.h>
#include <amdev.h>
static byte prev_write;
static int p = 10;
static int MAP[256] = {
0, // On/Off
_KEY_G, // A
_KEY_H, // B
_KEY_T, // SELECT
_KEY_Y, // START
_KEY_W,
_KEY_S,
_KEY_A,
_KEY_D,
255,
};
extern int key_state[];
inline byte psg_io_read(word address)
{
// Joystick 1
if (address == 0x4016) {
if (p++ < 9) {
return key_state[MAP[p]];
}
}
return 0;
}
inline void psg_io_write(word address, byte data)
{
if (address == 0x4016) {
if ((data & 1) == 0 && prev_write == 1) {
// strobe
p = 0;
}
}
prev_write = data & 1;
}
int key_p = 10;