// reset halt // flash write_image erase /Users/cjh/StellarisWare/boards/ek-lm3s1968/conway/gcc/conway.bin #include "conway.h" #include "ritoled.h" #include #include #include #define CNWY_W (128) #define CNWY_H (96) #define CNWY_SIZE (CNWY_W*CNWY_H) uint32_t world[(CNWY_W+2)*3]; // Simple xorshift PRNG for generating the initial state static uint32_t y32 = 1; uint32_t xorshift32(void) { y32 ^= (y32 << 13); y32 ^= (y32 >> 17); return y32 ^= (y32 << 5); } void InitConway(void) { uint32_t * row0 = world; uint32_t * row1 = world + CNWY_W + 2; uint32_t * row2 = world + (CNWY_W + 2)*2; for(int j = 1; j < (CNWY_W + 1); ++j) { row0[j] = 0;//xorshift32(); row1[j] = xorshift32(); row2[j] = 0;//xorshift32(); } } //***************************************************************************** // Life rules: // a live cell with 0 or 1 neighbors dies // a live cell with 4, 5, 6, 7, 8 neighbors dies // a live cell with 2 or 3 neighbors lives // a dead cell with 3 neighbors becomes a live cell // // Cells are individual bits of words, 30 or 31 cells to each 32-bit word. The // first and last bits are copied from their conterparts on the neighboring // words. A 32 bit word is considered a "col" in the following, and it is // assumed the end bits have already been copied from the neighbors. // // bits set if one neighbor along col: // col1n = (col[n] << 1) ^ (col[n] >> 1) // bits set if two neighbors along col: // col2n = (col[n] << 1) & (col[n] >> 1) // // bits set if one neighbor along column: // col1n = col[n-1] ^ col[n+1] // bits set if two neighbors along column: // col2n = col[n-1] & col[n+1] // // Diagonal cells on the left and right hand sides // ldiag1n = col1n >> 1 // rdiag1n = col1n << 1 // ldiag2n = col2n >> 1 // rdiag2n = col2n << 1 // // n1 = (col1n ^ col1n) & (~col2n) & (~col2n) // n2 = (col1n & col1n) // n3 = (col1n & col2n) | (col2n & col1n) // n4 = (col2n & col2n) // // Life: // A cell can have up to 8 neighbors. Computing a cell state involves essentially // doing 32 additions of 3 bit numbers in parallel, the nth bit of the integers // being grouped together in 32-bit words. It does not matter if a count is greater // than 4, so the third bit is never cleared...after counting 4, the count will // always be at least 4. // // uint32_t A = (col[n+1] << 1) // uint32_t B = (col[n+1]) // uint32_t C = (col[n+1] >> 1) // uint32_t D = (col[n] << 1) // uint32_t E = (col[n] >> 1) // uint32_t F = (col[n-1] << 1) // uint32_t G = (col[n-1]) // uint32_t H = (col[n-1] >> 1) // // uint32_t bits0 = A ^ B // uint32_t bits1 = A & B // uint32_t bits2 = 0; // // uint32_t carry = bits0 & C // bits0 ^= C // bits2 |= bits1 & carry // bit-or carry straight into bits2 // bits1 ^= carry // // carry = bits0 & D // bits0 ^= D // bits2 |= bits1 & carry // bits1 ^= carry // // carry = bits0 & E // bits0 ^= E // bits2 |= bits1 & carry // bits1 ^= carry // // carry = bits0 & F // bits0 ^= F // bits2 |= bits1 & carry // bits1 ^= carry // // carry = bits0 & G // bits0 ^= G // bits2 |= bits1 & carry // bits1 ^= carry // // carry = bits0 & H // bits0 ^= H // bits2 |= bits1 & carry // bits1 ^= carry // // // Set cell if already set and exactly 2 neighbors, or if exactly 3 neighbors, // otherwise clear cell. // col[n] = col[n] & bits1 & ~bits0;// keep if equal to 2 (bits1 set, bits0 clear) // col[n] |= bits1 & bits0;// set if equal to 3 // col[n] &= ~bits2;// clear if more than 3 // //***************************************************************************** void Conway(void) { // Screen is split into three rows, 31, 30, and 31 pixels tall. // Each word of world[] is a column of one of these cols. // LSB is uppermost. Row 0 is uppermost, row 2 is bottommost. // The bits on the boundaries between cols must be copied from // their counterparts on the adjacent cols. uint32_t * row0 = world; uint32_t * row1 = world + CNWY_W + 2; uint32_t * row2 = world + (CNWY_W + 2)*2; // Zero out left and right edges // Top and bottom edges are already zeroed by shift operations. row0[0] = row1[0] = row2[0] = 0; row0[CNWY_W+1] = row1[CNWY_W+1] = row2[CNWY_W+1] = 0; for(int j = 1; j < (CNWY_W+1); ++j) { // top row, bottom edge...copy bit 1 of row1 to bit 31 of row0 row0[j] = (row0[j] & ~(1 << 31)) | ((row1[j] & (1 << 1)) << 30); // middle row, top edge...copy bit 30 of row0 to bit 0 of row1 row1[j] = (row1[j] & ~1) | ((row0[j] >> 30) & 1); // middle row, bottom edge...copy bit 1 of row2 to bit 31 of row1 row1[j] = (row1[j] & ~(1 << 31)) | ((row2[j] << 30) & (1 << 31)); // bottom row, top edge...copy bit 30 of row1 to bit 0 of row2 row2[j] = (row2[j] & ~1) | ((row1[j] >> 30) & 1); } // All computations are identical regardless of row, so all rows can be done // at once. uint32_t prevState = 0;// backup of initial state of previous column for(int j = 1; j < ((CNWY_W+2)*3 - 1); ++j) { // Cells A and B, disregard the variable names right here... uint32_t carry = world[j+1] << 1; uint32_t tmp = world[j+1]; uint32_t bits0 = carry ^ tmp; uint32_t bits1 = carry & tmp; uint32_t bits2 = 0; // Sum up number of live neighboring cells for each cell #define ACCUM(x) tmp = (x); \ carry = bits0 & tmp; bits0 ^= tmp; \ bits2 |= bits1 & carry; bits1 ^= carry; ACCUM(world[j+1] >> 1) // Cell C ACCUM(world[j] << 1) // Cell D ACCUM(world[j] >> 1) // Cell E ACCUM(prevState << 1) // Cell F ACCUM(prevState) // Cell G ACCUM(prevState >> 1) // Cell H // Conway's Life rules prevState = world[j]; world[j] = prevState & bits1 & ~bits0;// keep if equal to 2 (bits1 set, bits0 clear) world[j] |= bits1 & bits0;// set if equal to 3 world[j] &= ~bits2;// clear if more than 3 } } // Double the low 16 bits of x. That is, 0101 becomes 00110011. inline uint32_t DoubleBits(uint32_t x) { // Spread the bits x = (x | (x << 8)) & 0x00FF00FF;// shift high byte over x = (x | (x << 4)) & 0x0F0F0F0F;// shift high nybbles over x = (x | (x << 2)) & 0x33333333;// shift high bit pairs over x = (x | (x << 1)) & 0x55555555;// shift high bits over // and bit-or with an offset copy to fill the gaps return x | (x << 1); } // Quadruple the low 8 bits of x. That is, 0101 becomes 0000111100001111. inline uint32_t QuadBits(uint32_t x) {return DoubleBits(DoubleBits(x));} // Each 32 bit word holds 4 4-bit pixels belonging to a single column of the // display, but each byte of data sent to the display specifies two pixels // from adjacent columns: two columns of data are sent at the same time, one // row per byte. Need to interleave nybbles from two columns. // This could be avoided by switching the orientation from three 30-31 pixel // tall rows to four 30-31 pixel wide columns, at some cost in efficiency in // computation. inline void WriteColBytes(uint32_t a, uint32_t b) { // a and b are an adjacent columns of 8 pixels spread out across 32 bits // need to join nybbles of the same row into each byte uint8_t a0 = a & 0xFF, b0 = b & 0xFF; uint8_t a1 = (a>>8) & 0xFF, b1 = (b>>8) & 0xFF; uint8_t a2 = (a>>16) & 0xFF, b2 = (b>>16) & 0xFF; uint8_t a3 = (a>>24) & 0xFF, b3 = (b>>24) & 0xFF; RIT_WriteByte((a0 << 4) | (b0 & 0x0F)); RIT_WriteByte((a0 & 0xF0) | (b0 >> 4)); RIT_WriteByte((a1 << 4) | (b1 & 0x0F)); RIT_WriteByte((a1 & 0xF0) | (b1 >> 4)); RIT_WriteByte((a2 << 4) | (b2 & 0x0F)); RIT_WriteByte((a2 & 0xF0) | (b2 >> 4)); RIT_WriteByte((a3 << 4) | (b3 & 0x0F)); RIT_WriteByte((a3 & 0xF0) | (b3 >> 4)); } inline void WriteCols(uint32_t a, uint32_t b) { WriteColBytes(QuadBits(a & 0xFF), QuadBits(b & 0xFF)); WriteColBytes(QuadBits((a >> 8) & 0xFF), QuadBits((b >> 8) & 0xFF)); WriteColBytes(QuadBits((a >> 16) & 0xFF), QuadBits((b >> 16) & 0xFF)); WriteColBytes(QuadBits((a >> 24) & 0xFF), QuadBits((b >> 24) & 0xFF)); } void UpdateDisplay(void) { uint32_t * row0 = world; uint32_t * row1 = world + CNWY_W + 2; uint32_t * row2 = world + (CNWY_W + 2)*2; // for(int x = 1; x < 129; ++x) for(int xx = 0; xx < 63; ++xx) { RIT_SendCmd2(RIT_SET_COL_ADDR, xx, xx); RIT_SendCmd2(RIT_SET_ROW_ADDR, 0, 127); RIT_SendCmd(RIT_SET_REMAP, RIT_REMAP_V); RIT_SetDataMode(); // set data mode int x = 2*xx + 1; // Packed column words: // fill LSB (temp bit) of row2 with MSB (bit 30) of row1 uint32_t col2 = (row2[x] & ~1) | ((row1[x] >> 30) & 1); // shift by two (temp bit and copied bit), fill 3 LSB of row1 with 3 MSB (28-30) of row0 uint32_t col1 = ((row1[x] << 2) & ~7) | ((row0[x] >> 28) & 7); // just shift by 4 (temp bit and 3 copied bits) uint32_t col0 = row0[x] << 4; // fill LSB (temp bit) of row2 with MSB (bit 30) of row1 uint32_t col2b = (row2[x+1] & ~1) | ((row1[x] >> 30) & 1); // shift by two (temp bit and copied bit), fill 3 LSB of row1 with 3 MSB (28-30) of row0 uint32_t col1b = ((row1[x+1] << 2) & ~7) | ((row0[x] >> 28) & 7); // just shift by 4 (temp bit and 3 copied bits) uint32_t col0b = row0[x+1] << 4; // Rather than flip bit orders, flip rows around WriteCols(col0, col0b); WriteCols(col1, col1b); WriteCols(col2, col2b); } }