Index: amdk8/raminit.c
===================================================================
--- amdk8/raminit.c	(revision 2776)
+++ amdk8/raminit.c	(working copy)
@@ -395,12 +395,12 @@
 	 *	   111 = Oldest entry in DCQ can be bypassed 7 times
 	 * [31:28] Reserved
 	 */
-	PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000, 
+	PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000,
 	(4 << 25)|(0 << 24)| 
 	(0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)| 
 	(1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)| 
 	(2 << 14)|(0 << 13)|(0 << 12)| 
-	(0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)| 
+	(0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)|
 	(0 << 3) |(0 << 1) |(0 << 0),
 	/* DRAM Config High Register
 	 * F2:0x94
@@ -1177,6 +1177,7 @@
 	int i;
 	int registered;
 	int unbuffered;
+	int has_dualch = is_opteron(ctrl);
 	uint32_t dcl;
 	unbuffered = 0;
 	registered = 0;
@@ -1201,17 +1202,19 @@
 	if (unbuffered && registered) {
 		die("Mixed buffered and registered dimms not supported");
 	}
-#if 1
-	// yhlu debug: Athlon64 939 can do dual channel, but it uses unbuffered DIMMs
-	if (unbuffered && is_opteron(ctrl)) {
-		die("Unbuffered Dimms not supported on Opteron");
-	}
-#endif
 
 	dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
 	dcl &= ~DCL_UnBufDimm;
 	if (unbuffered) {
-		dcl |= DCL_UnBufDimm;
+		if (has_dualch) && (!is_cpu_pre_d0()) {
+			dcl |= DCL_UnBufDimm | DCL_DualDIMMen | DCL_En2T | DCL_UpperCSMap;
+			//TODO TODO
+			//fix 2T clocks enable
+			//add UpperCSMap support for the 939 Sockets	
+			//what about Burst2Opt for revE? (perhaps not...)
+		} else {
+			dcl |= DCL_UnBufDimm;
+		}
 	}
 	pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
 #if 0
@@ -1333,6 +1336,9 @@
 	uint32_t dch_memclk;
 	uint16_t dch_tref4k, dch_tref8k;
 	uint8_t	 dtl_twr;
+	uint8_t	 dtl_twtr;
+	unit8_t  dtl_trwt[3][3]; /* first index is CAS_LAT 2/2.5/3 and 128/registered64/64 */
+ 	uint8_t	 rdpreamble[4]; /* 0 is for registered, 1 for 1-2 DIMMS, 2 and 3 for 3 or 4 unreg dimm slots */
 	char name[9];
 };
 
@@ -1349,6 +1355,9 @@
 			.dch_tref4k = DTH_TREF_100MHZ_4K,
 			.dch_tref8k = DTH_TREF_100MHZ_8K,
 			.dtl_twr    = 2,
+			.dtl_twtr   = 1,
+			.dtl_trwt   = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+			.rdpreamble = { ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0) }
 		},
 		{
 			.name	    = "133Mhz\r\n",
@@ -1360,6 +1369,9 @@
 			.dch_tref4k = DTH_TREF_133MHZ_4K,
 			.dch_tref8k = DTH_TREF_133MHZ_8K,
 			.dtl_twr    = 2,
+			.dtl_twtr   = 1,
+			.dtl_trwt   = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+			.rdpreamble = { ((8 << 1) + 0), ((7 << 1) + 0), ((7 << 1) + 1), ((7 << 1) + 0) }
 		},
 		{
 			.name	    = "166Mhz\r\n",
@@ -1371,6 +1383,9 @@
 			.dch_tref4k = DTH_TREF_166MHZ_4K,
 			.dch_tref8k = DTH_TREF_166MHZ_8K,
 			.dtl_twr    = 3,
+			.dtl_twtr   = 1,
+			.dtl_trwt   = { { 3, 2, 3 }, { 3, 3, 4 }, { 4, 3, 4 }},
+			.rdpreamble = { ((7 << 1) + 1), ((6 << 1) + 0), ((6 << 1) + 1), ((6 << 1) + 0) }
 		},
 		{
 			.name	    = "200Mhz\r\n",
@@ -1382,6 +1397,9 @@
 			.dch_tref4k = DTH_TREF_200MHZ_4K,
 			.dch_tref8k = DTH_TREF_200MHZ_8K,
 			.dtl_twr    = 3,
+			.dtl_twtr   = 2,
+			.dtl_trwt   = { { 0, 0, 3 }, { 0, 0, 4 }, { 3, 3, 4 }},
+			.rdpreamble = { ((7 << 1) + 0), ((5 << 1) + 0), ((5 << 1) + 1), ((5 << 1) + 1) }
 		},
 		{
 			.cycle_time = 0x00,
@@ -1397,9 +1415,9 @@
 		die("min_cycle_time to low");
 	}
 	print_spew(param->name);
-#ifdef DRAM_MIN_CYCLE_TIME
+//#ifdef DRAM_MIN_CYCLE_TIME
 	print_debug(param->name);
-#endif
+//#endif
 	return param;
 }
 
@@ -1425,6 +1443,10 @@
 
 
 	value = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
+	print_debug("NB CAP REG:");
+	print_debug_hex32(value);
+	print_debug("\n");
+
 	min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK];
 	bios_cycle_time = min_cycle_times[
 		read_option(CMOS_VSTART_max_mem_clock, CMOS_VLEN_max_mem_clock, 0)];
@@ -1877,76 +1899,48 @@
 static void set_Twtr(const struct mem_controller *ctrl, const struct mem_param *param)
 {
 	uint32_t dth;
-	unsigned clocks;
-	clocks = 1; /* AMD says hard code this */
+
 	dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
-	dth &= ~(DTH_TWTR_MASK << DTH_TWTR_SHIFT);
-	dth |= ((clocks - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
+	dth &= ~(DTL_TWTR_MASK << DTL_TWTR_SHIFT);
+	dth |= ((param->dtl_twr - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
 	pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
 }
 
 static void set_Trwt(const struct mem_controller *ctrl, const struct mem_param *param)
 {
 	uint32_t dth, dtl;
-	unsigned divisor;
 	unsigned latency;
 	unsigned clocks;
+	int lat, mtype;
 
 	clocks = 0;
 	dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
 	latency = (dtl >> DTL_TCL_SHIFT) & DTL_TCL_MASK;
-	divisor = param->divisor;
 
-	if (is_opteron(ctrl)) {
-		if (latency == DTL_CL_2) {
-			if (divisor == ((6 << 0) + 0)) {
-				/* 166Mhz */
-				clocks = 3;
-			}
-			else if (divisor > ((6 << 0)+0)) {
-				/* 100Mhz && 133Mhz */
-				clocks = 2;
-			}
-		}
-		else if (latency == DTL_CL_2_5) {
-			clocks = 3;
-		}
-		else if (latency == DTL_CL_3) {
-			if (divisor == ((6 << 0)+0)) {
-				/* 166Mhz */
-				clocks = 4;
-			}
-			else if (divisor > ((6 << 0)+0)) {
-				/* 100Mhz && 133Mhz */
-				clocks = 3;
-			}
-		}
+	if (is_opteron()) {
+		mtype = 0; /* dual channel */
+	} else (is_registered()) {
+		mtype = 1; /* registered 64bit interface */
+	} else {
+		mtype = 2; /* unbuffered 64bit interface */
 	}
-	else /* Athlon64 */ {
-		if (is_registered(ctrl)) {
-			if (latency == DTL_CL_2) {
-				clocks = 2;
-			}
-			else if (latency == DTL_CL_2_5) {
-				clocks = 3;
-			}
-			else if (latency == DTL_CL_3) {
-				clocks = 3;
-			}
-		}
-		else /* Unbuffered */{
-			if (latency == DTL_CL_2) {
-				clocks = 3;
-			}
-			else if (latency == DTL_CL_2_5) {
-				clocks = 4;
-			}
-			else if (latency == DTL_CL_3) {
-				clocks = 4;
-			}
-		}
+
+	switch (latency) {
+		case DTL_CL_2:
+			lat = 0;
+			break;
+		case DTL_CL_2_5:
+			lat = 1;
+			break;
+		case DTL_CL_3
+			lat = 2;
+			break;
+		default
+			lat = -1;
 	}
-	if ((clocks < DTH_TRWT_MIN) || (clocks > DTH_TRWT_MAX)) {
+	
+	clocks = param->dtl_trwt[lat][mtype]
+	if ((clocks < DTH_TRWT_MIN) || (clocks > DTH_TRWT_MAX) || (lat == -1)) {
 		die("Unknown Trwt\r\n");
 	}
 	
@@ -1977,85 +1971,44 @@
 static void set_read_preamble(const struct mem_controller *ctrl, const struct mem_param *param)
 {
 	uint32_t dch;
-	unsigned divisor;
 	unsigned rdpreamble;
-	divisor = param->divisor;
-	dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
-	dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
-	rdpreamble = 0;
-	if (is_registered(ctrl)) {
-		if (divisor == ((10 << 1)+0)) {
-			/* 100Mhz, 9ns */
-			rdpreamble = ((9 << 1)+ 0);
+	int slots, i;
+
+	slots = 0;
+
+	for(i = 0; i < 4; i++) {
+		if (ctrl->channel0[i]) {
+			slots += 1;
 		}
-		else if (divisor == ((7 << 1)+1)) {
-			/* 133Mhz, 8ns */
-			rdpreamble = ((8 << 1)+0);
-		}
-		else if (divisor == ((6 << 1)+0)) {
-			/* 166Mhz, 7.5ns */
-			rdpreamble = ((7 << 1)+1);
-		}
-		else if (divisor == ((5 << 1)+0)) {
-			/* 200Mhz,  7ns */
-			rdpreamble = ((7 << 1)+0);
-		}
 	}
-	else {
-		int slots;
-		int i;
-		slots = 0;
-		for(i = 0; i < 4; i++) {
-			if (ctrl->channel0[i]) {
-				slots += 1;
-			}
-		}
-		if (divisor == ((10 << 1)+0)) {
-			/* 100Mhz */
-			if (slots <= 2) {
-				/* 9ns */
-				rdpreamble = ((9 << 1)+0);
-			} else {
-				/* 14ns */
-				rdpreamble = ((14 << 1)+0);
-			}
-		}
-		else if (divisor == ((7 << 1)+1)) {
-			/* 133Mhz */
-			if (slots <= 2) {
-				/* 7ns */
-				rdpreamble = ((7 << 1)+0);
-			} else {
-				/* 11 ns */
-				rdpreamble = ((11 << 1)+0);
-			}
-		}
-		else if (divisor == ((6 << 1)+0)) {
-			/* 166Mhz */
-			if (slots <= 2) {
-				/* 6ns */
-				rdpreamble = ((7 << 1)+0);
-			} else {
-				/* 9ns */
-				rdpreamble = ((9 << 1)+0);
-			}
-		}
-		else if (divisor == ((5 << 1)+0)) {
-			/* 200Mhz */
-			if (slots <= 2) {
-				/* 5ns */
-				rdpreamble = ((5 << 1)+0);
-			} else {
-				/* 7ns */
-				rdpreamble = ((7 << 1)+0);
-			}
-		}
+
+	/* map to index to param.rdpreamble array */
+	if (is_registered(ctrl)) {
+		i = 0;
+	} else if (slots < 3) {
+		i = 1;
+	} else if (slots == 3) {
+		i = 2;
+	} else if (slots == 4) {
+		i = 3;
+	} else {
+		die("Unknown rdpreamble for this nr of slots");
 	}
+
+	dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
+	dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
+	rdpreamble = param->rdpreamble[i];
+
 	if ((rdpreamble < DCH_RDPREAMBLE_MIN) || (rdpreamble > DCH_RDPREAMBLE_MAX)) {
 		die("Unknown rdpreamble");
 	}
+
 	dch |= (rdpreamble - DCH_RDPREAMBLE_BASE) << DCH_RDPREAMBLE_SHIFT;
 	pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
+
+	print_err("RDPREAMBLE: \n");
+	print_err_hex16(rdpreamble);
+	print_err("\n");
 }
 
 static void set_max_async_latency(const struct mem_controller *ctrl, const struct mem_param *param)
Index: amdk8/setup_resource_map.c
===================================================================
--- amdk8/setup_resource_map.c	(revision 2776)
+++ amdk8/setup_resource_map.c	(working copy)
@@ -1,4 +1,4 @@
-#define RES_DEBUG 0
+#define RES_DEBUG 1
 
 static void setup_resource_map_offset(const unsigned int *register_values, int max, unsigned offset_pci_dev, unsigned offset_io_base)
 {       
@@ -11,7 +11,7 @@
                 device_t dev;
                 unsigned where;
                 unsigned long reg;
-#if 0
+#if 1
 	#if CONFIG_USE_PRINTK_IN_CAR
                 prink_debug("%08x <- %08x\r\n", register_values[i] +  offset_pci_dev, register_values[i+2]);
         #else
Index: amdk8/northbridge.c
===================================================================
--- amdk8/northbridge.c	(revision 2776)
+++ amdk8/northbridge.c	(working copy)
@@ -562,7 +562,7 @@
 	base  |= (resource->base >> 8) & 0xffffff00;
 	base  |= 3;
 	limit &= 0x00000048;
-	limit |= ((resource->base + resource->size) >> 8) & 0xffffff00;
+	limit |= ((resource->base + resource->size - 1) >> 8) & 0xffffff00;
 	limit |= (resource->index & 3) << 4;
 	limit |= (nodeid & 7);
 	f1_write_config32(reg + 0x4, limit);
Index: amdk8/amdk8.h
===================================================================
--- amdk8/amdk8.h	(revision 2776)
+++ amdk8/amdk8.h	(working copy)
@@ -134,7 +134,9 @@
 #define	 DCL_D_DRV	   (1<<1)
 #define	 DCL_QFC_EN	   (1<<2)
 #define	 DCL_DisDqsHys	   (1<<3)
+#define  DCL_Burst2Opt     (1<<5)
 #define	 DCL_DramInit	   (1<<8)
+#define  DCL_DualDIMMen    (1<<9)
 #define	 DCL_DramEnable	   (1<<10)
 #define	 DCL_MemClrStatus  (1<<11)
 #define	 DCL_ESR	   (1<<12)
@@ -147,7 +149,8 @@
 #define	 DCL_DisInRcvrs    (1<<24)
 #define	 DCL_BypMax_SHIFT  25
 #define	 DCL_En2T          (1<<28)
-
+#define  DCL_UpperCSMap    (1<<29)
+	
 #define DRAM_CONFIG_HIGH   0x94
 #define	 DCH_ASYNC_LAT_SHIFT  0
 #define	 DCH_ASYNC_LAT_MASK   0xf

