/*******************************************************************************
 * 
 * The content of this file or document is CONFIDENTIAL and PROPRIETARY
 * to Mobilygen Corporation.  It is subject to the terms of a
 * License Agreement between Licensee and Mobilygen Corporation.
 * restricting among other things, the use, reproduction, distribution
 * and transfer.  Each of the embodiments, including this information and
 * any derivative work shall retain this copyright notice.
 * 
 * Copyright 2008 Mobilygen Corporation
 * All rights reserved.
 * 
 * QuArc is a registered trademark of Mobilygen Corporation.
 * 
 *******************************************************************************/

#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/device.h>
#include <scsi/scsi_host.h>
#include <linux/platform_device.h>
#include <linux/libata.h>
#include <linux/scatterlist.h>
#include <linux/proc_fs.h>
#include <linux/completion.h>
#include <linux/timer.h>
#include <linux/clk.h>
#include <linux/amba/bus.h>
#include <linux/libata.h>
#include <scsi/scsi_cmnd.h>

#include <linux/aumb4000_ide_device.h>
#include <asm/arch/mobi_qccsisc.h>
#include <asm/arch/mobi_reset.h>
#include <asm/arch/mg3500_devices.h>
#include "pata_aumb4000.h"

#define DRV_NAME    "pata_aumb4000"
#define DRV_VERSION "1.0"

#define PRV_FROM_AP2(x)      (struct mobi_ata_prv_data*)(x->host)->private_data

void __iomem *reg_iobase;
uint32_t ns_per_cycle;

#define REG_READ(offset) \
	readl(reg_iobase+offset)

#define REG_WRITE(data, offset) \
	writel(data, reg_iobase+offset)

#define CLR_STATUS() \
	REG_WRITE(AUMB4000_CF_STATUS_DW_MASK |AUMB4000_CF_STATUS_CW_MASK, \
			AUMB4000_CF_STATUS_OFFSET);

int max_sectors = 8;
module_param(max_sectors, int, 0444);
MODULE_PARM_DESC(max_sectors, "set max_sectors for a dma/sector transfer, default=8");

#undef UDMA_TESTING_ENABLE 

extern int loglevel;

#if AUMB4000_DEBUG
module_param(loglevel, int, 0644);
MODULE_PARM_DESC(loglevel, "Set module debug level(0-5)");
static struct proc_dir_entry *ide_proc_dir=NULL;
#endif

static void aumb4000_dma_handler(mobi_dma_handle dmah, mobi_dma_event dma_event, void* p);
static void aumb4000_irq_clear(struct ata_port* ap);
static void aumb4000_bmdma_start(struct ata_queued_cmd* qc);

/*
#ifdef CONFIG_SYSFS
#include "mobi_ata_sysfs.c"
static void show_stat_err(struct mobi_ata_prv_data* prv, const char* caller);
static int scsi_scsi_write_proc(struct file*, const char*, unsigned long, void*);
extern int scsi_scsi_debug_proc(char*, char**, off_t, int, int*, void*);
extern int controlDump(unsigned long regbase, char* buf, unsigned short i);
extern int cfStatusDump(unsigned long regbase, char* buf, unsigned short i);
extern int statusDump(unsigned long regbase, char* buf, unsigned short i);
extern int cfErrorDump(unsigned long regbase, char* buf, unsigned short i);
#endif
*/

/* 
 * some nice info
 * PIO Modes: 0 [3.3MB/s], 1 [5.2MB/s], 2  [8.3MB/s], 3 [11.1MB/s], 4 [16.7MB/s]
 * Multiword DMA Modes: 0 [4.2MB/s], 1 [13.3MB/s], 2 [16.7MB/s] 3 [20MB/s], 4 [25MB/s]
 * Ultra DMA Modes: 0 [16.7MB/s], 1 [25.0MB/s], 2 [33.3MB/s], 3 [44.4MB/s], 4 [66.7MB/s], 5 [100.0MB/s]
 *
 *
 * udma33  is 0,1,2
 * udma66  is 3,4
 * udma100 is 5
 * udma133 is 6
 *
 */
/* this sucks, I want a data structure of an array of timing defs 
 * where each array has 17 elements of different types of structs 
 * depending on the register.  just having structs won't work
 * since I want to interrate over the 17 elements...  
 */
/* I don't really know yet what all this different modes are so I'll
 * assume they all could have different timings although as you 
 * can see, they are mostly empty right now.
 * Also, if we have variations with timings, we may change this table
 * to also have mfg ids
 */
/* XXX WARNING at this time all pio modes will use the timings set in PIO0, 
 * see set_piomode to changes this
 */
struct mode_timings_defs drive_mode_timings[] =  {
	{ /* PIO_0 */ 
		/*  fld0   fld1  fld2  fld3 */
		.mode_timing_values = {
			   0,    32,   96,   24, /* read0: taosa toaha tceosa toceha     */
			   0,     0,  312,  952, /* read1: tadela tcedela toedela trca   */
			   0,    32,  424,  232, /* read2: towha toehza trpa trdmarq     */
			 136,    32,  328,    0, /* read3: taosc toahc tceosc tocehc     */
			1088,     0,  328, 1192, /* read4: tadelc tcedelc toedelc trcc   */
			 616,	 56,  328,    0, /* read5: towhc toehzc trpc rsvd0 	     */
			   0,    32,   96,   24, /* write0: tawsa twaha tcewsa twceha    */
			   0,     0,  192,   32, /* write1: taws2a tcewsa2 tdwsa twdha   */
			 952,   424,    0,   72, /* write2: twca twpa twoha twdmarq      */
			 136,    32,  328,    0, /* write3: tawsc twahc tcewsc twcehc    */
			   0,     0,  112,   56, /* write4: taws2c tcews2c tdwsc twdhc   */
			   0,   328,    0,   16, /* write5: twcc twpc twohc tdhz         */
    	 4194304,   800,             /* cmd0: tdiag tcmd                     */
		   49152,   800,  800,       /* cmd1: trstp tdevsel tdma             */
		 4194304,     0,             /* cmd2: tsrst rsvd0                    */
		 4194304,     0,             /* cmd3: thrst rsvd0                    */
		 4194304,     0,             /* cmd4: tpwr  rsvd0                    */
		},

	}, 
	{ /* PIO_1 */ }, { /* PIO_2 */ }, { /* PIO_3 */ }, 
	{ /* PIO_4 */ }, { /* PIO_5 */ }, { /* PIO_6 */ },
	{ /* MW_DMA_0 */ }, { /* MW_DMA_1 */ }, { /* MW_DMA_2 */ }, 
	{ /* MW_DMA_3 */ }, { /* MW_DMA_4 */ },
	{ /* UDMA_0 */ }, { /* UDMA_1 */ }, 
	{ /* UDMA_2 */ 
		/*  fld0   fld1  fld2  fld3 */
		.mode_timing_values = {
			  16,    16,   16,   16, /* read0: taosa toaha tceosa toceha     */
			   8,     8,    8,    8, /* read1: tadela tcedela toedela trca   */
			  16,   100,   50,   25, /* read2: towha toehza trpa trdmarq     */
			  66,    16,  166,    0, /* read3: taosc toahc tceosc tocehc     */
			   0,     0,  166,  591, /* read4: tadelc tcedelc toedelc trcc   */
			 308,	 25,  166,    0, /* read5: towhc toehzc trpc rsvd0 	     */
			   0,    16,   50,    8, /* write0: tawsa twaha tcewsa twceha    */
			   0,     0,   91,   16, /* write1: taws2a tcewsa2 tdwsa twdha   */
			 475,   208,    0,   33, /* write2: twca twpa twoha twdmarq      */
			  66,    16,  166,    0, /* write3: tawsc twahc tcewsc twcehc    */
			   0,     0,   58,   25, /* write4: taws2c tcews2c tdwsc twdhc   */
			   0,   166,    0,    8, /* write5: twcc twpc twohc tdhz         */
    	  125824,   384,             /* cmd0: tdiag tcmd                     */
		   24575,   384,  384,       /* cmd1: trstp tdevsel tdma             */
		  125824,     0,             /* cmd2: tsrst rsvd0                    */
		  125824,     0,             /* cmd3: thrst rsvd0                    */
		  125824,     0,             /* cmd4: tpwr  rsvd0                    */
		},
	}, 
	{ /* UDMA_3 */ }, { /* UDMA_4 */ }, { /* UDMA_5 */ }, 
	{ /* UDMA_6 */ }, { /* UDMA_7 */ },
};
	
static int aumb4000_set_mode_timing(uint8_t current_mode)
{
	int treg;
	/* should check for null/unsupported mode */
	struct mode_timings_defs *current_mode_def = &drive_mode_timings[current_mode];
	uint32_t *current_timings = &(current_mode_def->mode_timing_values[0]);
	uint32_t offset, cycle_counts, treg_time_offset;

	if (current_mode_def != NULL)
		current_timings = &(current_mode_def->mode_timing_values[0]);

	/* assume ns_per_cycle is global for now */
#define BITSAT(want, max) ((uint32_t)want > (uint32_t)max ? max : want)
#define NS_TO_CYCLES8(ns)	BITSAT(ns/ns_per_cycle, BIT_FLD8_MAX_VALUE)
#define NS_TO_CYCLES16(ns)	BITSAT(ns/ns_per_cycle, BIT_FLD16_MAX_VALUE)
#define NS_TO_CYCLES24(ns)	BITSAT(ns/ns_per_cycle, BIT_FLD24_MAX_VALUE)

#define ASSEMBLE_2MEMBER_TIMING_REG(x) \
	((((NS_TO_CYCLES24(current_timings[x])) <<  0) & 0x00ffffff) | \
	 (((NS_TO_CYCLES8(current_timings[x+1])) << 24) & 0xff000000))
#define ASSEMBLE_3MEMBER_TIMING_REG(x) \
	((((NS_TO_CYCLES16(current_timings[x])) <<  0) & 0x0000ffff) | \
	 (((NS_TO_CYCLES8(current_timings[x+1])) << 16) & 0x00ff0000) | \
	 (((NS_TO_CYCLES8(current_timings[x+2])) << 24) & 0xff000000))
#define ASSEMBLE_4MEMBER_TIMING_REG(x) \
	((((NS_TO_CYCLES8(current_timings[x])) <<  0) & 0x000000ff) | \
	 (((NS_TO_CYCLES8(current_timings[x+1])) <<  8) & 0x0000ff00) | \
	 (((NS_TO_CYCLES8(current_timings[x+2])) << 16) & 0x00ff0000) | \
	 (((NS_TO_CYCLES8(current_timings[x+3])) << 24) & 0xff000000))

	for (treg=0, treg_time_offset=0; treg < ARRAY_SIZE(timing_regs_offset); treg++) {
		/* offsets not sequential so get from table... */
		offset = timing_regs_offset[treg];
		switch(offset) {
			/* one 24 bit field and one 8 bit field */
			case AUMB4000_CF_TIMING_CMD0_OFFSET:
			case AUMB4000_CF_TIMING_CMD2_OFFSET:
			case AUMB4000_CF_TIMING_CMD3_OFFSET:
			case AUMB4000_CF_TIMING_CMD4_OFFSET:
				cycle_counts = 
					ASSEMBLE_2MEMBER_TIMING_REG(treg_time_offset);
				treg_time_offset += 2;
				break;
			/* one 16 bit field, two 8 bit fields */
			case AUMB4000_CF_TIMING_CMD1_OFFSET:
				cycle_counts = 
					ASSEMBLE_3MEMBER_TIMING_REG(treg_time_offset);
				treg_time_offset += 3;
				break;
			/* four 8 bit fields, all read/write timings */
			default:
				cycle_counts = 
					ASSEMBLE_4MEMBER_TIMING_REG(treg_time_offset);
				treg_time_offset += 4;
				break;
		}
		//dprintk("[%d] write 0x%08x to offset 0x%x\n", treg, cycle_counts, offset);
		REG_WRITE(cycle_counts, offset);
	}
	return 0;
}

/* check to see if a command has completed */
/* return 1 if ready, 0 if not */
static uint8_t aumb4000_waitready(void)
{
	int32_t i = 1000000;
	
	while (--i && (REG_READ(CF_COMMAND) & 0x1ff))
		;

	if (i)
		return 1;

	dprintk3("device not ready: 0x%x\n", REG_READ(CF_COMMAND));
	return 0;
}

/* return 1 if ready, 0 if not */
#define CMD_REG_WRITE(cmd)	cmd_reg_write(cmd)
static uint8_t cmd_reg_write(uint16_t cmd)
{
	uint8_t ret = 0;

	CLR_STATUS();
	REG_WRITE(cmd, CF_COMMAND);
	/* if it less than 0x40 is is a direct command block 
	 * cmd, and we need to get completed status after, for 
	 * cf_card data commands, the doc says we should check
	 * status after data is read/written and we can't do that
	 * here
	 */
	if (cmd < 0x40) 
		ret = aumb4000_waitready();
	else 
		ret = 1;

	/* empty if in non-debug */
	if (cmd != CF_COMMAND_READ_STATUS && 
			cmd != CF_COMMAND_READ_ALTERNATE_STATUS)
		dprintk2("sent cmd 0x%x return %d\n", cmd, ret);

	return ret;
}


#define USE_SINGLE_SETMODE
#ifdef USE_SINGLE_SETMODE
/*
 *  these guys: mobi_set_ piomode, mode, dmamode all drive the set_features
 *  command that this driver does in initialization.... 
 *  however, I believe these exist so that modes can be changed, why, when
 *  who, I don't know but these would take care of things like setting the 
 *  timings correctly for the mode.
 *
 *  OK, if we define .set_mode we don't need pio/dma mode(so they are gone 
 *  now:) and we should handle everything in our private setmode  OR
 *  we don't define .set_mode, let ata_set_mode take care of setting things
 *  like xfer_mode, shift, blah and then use pio/dma_mode to do stuff special 
 *  for out controller.
 *
 *  in either case timings and such could be take care of at that point for
 *  the selected mode.
 */
static int aumb4000_set_mode(struct ata_port* ap, struct ata_device* *adev) 
{
	/* XXX HACK HACK HACK  
	 * so don't ask me why but if I create seperate set_dmamode and set_piomode
	 * and try to set the piomode from there things hang(even if I hardcode the
	 * value)  I'm not sure why and since things are such a disaster with this
	 * controller it probably doesn't matter.  DMA to the drive is not working
	 * so just put it in PIO mode
	 */
	REG_WRITE(SETFEATURES_XFER, CF_CARD_FEATURES);
	/* PIO_MODE_4 == 0xc */
	REG_WRITE(0xc, CF_CARD_SECTOR_COUNT);
	CMD_REG_WRITE(CF_COMMAND_CF_CARD_SET_FEATURES);
	ata_pause(ap);

	return 0;
}

#else
static void aumb4000_set_piomode(struct ata_port *ap, 
		struct ata_device *adev)
{
	uint32_t pio_mode_index;
	/* 
	 * for now, we just set the PIO timings once at probe 
	 * time, after that, well.. we'll see how the udma times
	 * work
	 */
	pio_mode_index = adev->pio_mode & 0x7;
	dprintk1("dev->pio_mode = 0x%x, timing index %d\n", 
			adev->pio_mode, pio_mode_index);

	aumb4000_set_mode_timing(PIO0);

	REG_WRITE(SETFEATURES_XFER, CF_CARD_FEATURES);
	//REG_WRITE(adev->pio_mode, CF_CARD_SECTOR_COUNT);
	REG_WRITE(0x24, CF_CARD_SECTOR_COUNT);
	CMD_REG_WRITE(CF_COMMAND_CF_CARD_SET_FEATURES);
}

static void aumb4000_set_dmamode(struct ata_port *ap, struct ata_device *adev)
{
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	uint32_t dma_mode_index;
	dbg_func_in();

	/* the udma timing break the PIO read of the device ID, so until
	 * we've run the dev id cmd we don't want the timings to be for
	 * udma,  fortunately, we already detecte the 0xec command so
	 */
	if (unlikely(prv->dev_id_cmd_done == 0))
		return;

	/* XXX we are only doing UDMA at this time */
	/* lower 3 bits say which mode/speed for a given protocol */
	dma_mode_index = ATA_BITS_PIO + ATA_BITS_MWDMA + (adev->dma_mode & 0x7);
	dprintk1("dev->dma_mode = 0x%x, timing index %d\n", 
			adev->dma_mode, dma_mode_index);

	aumb4000_set_mode_timing(dma_mode_index);

	REG_WRITE(SETFEATURES_XFER, CF_CARD_FEATURES);
	REG_WRITE(adev->dma_mode, CF_CARD_SECTOR_COUNT);
	CMD_REG_WRITE(CF_COMMAND_CF_CARD_SET_FEATURES);
	ata_pause(ap);
}
#endif

/*
static void show_stat_err(struct mobi_ata_prv_data* prv, const char* caller) 
{
	char buf[1024];

	printk(KERN_WARNING "%s: %s is the caller command register %x\n", 
			__FUNCTION__, caller, REG_READ(CF_COMMAND));

	cfStatusDump((uint32_t)reg_iobase, buf, 36);
	printk(KERN_WARNING "%s:%s", __FUNCTION__, buf);

	cfErrorDump((uint32_t)reg_iobase, buf, 30);
	printk(KERN_WARNING "%s:%s", __FUNCTION__, buf);

	statusDump((uint32_t)reg_iobase, buf, 19);
	printk(KERN_WARNING "%s:%s", __FUNCTION__, buf);
}
*/

/* this issues a hard reset to the devices */
static int hard_reset(void)
{
	dbg_func_in();
	if (CMD_REG_WRITE(CF_COMMAND_RESET)) {
		/* if I turn on all the status printk, I see the scsi error handler
		 * doing a lot of probes, it works out OK in the end but a bit of a
		 * big delay here reduces then number of request ??
		 */
		mdelay(300);
		return 0;
	}
	return 1;
}
/* the hard drive's interrupt is attached to a GPIO pin */
static int __init config_gpio(struct mobi_ata_prv_data* prv) 
{
	int ret = -EINVAL;
	struct gpio_interrupt_config gpioConfig = {
		.type           = GPIO_INT_TYPE_LEVEL,
		.polarity       = GPIO_INT_POLARITY_HIGH_OR_RISING,
		.debounce_on    = 1,
	};
	char gpioName[32] = {0};

	prv->gpio_shift = prv->aumb4k->gpio_shift;

	snprintf(gpioName, sizeof(gpioName), "GPIO_%d", prv->aumb4k->gpio);

	if (NULL == (prv->gpio_hdl = gpio_driver_get(gpioName))) 
		return ret;

	if ((ret = gpio_interrupt_configure(prv->gpio_hdl, 
					prv->gpio_shift, &gpioConfig)) < 0)  {
		gpio_driver_put(prv->gpio_hdl);
		return ret;
	}
	
	return gpio_interrupt_enable(prv->gpio_hdl, prv->gpio_shift) ;
}

static void aumb4000_dev_config(struct ata_port* ap, struct ata_device* dev) 
{
	dev->max_sectors = max_sectors;
}

static uint8_t aumb4000_check_status(struct ata_port* ap) 
{
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	uint8_t ret = 0x0;

	/* in theory, the status register is supposed to be updated after the 
	 * last command executed but I'm not sure that this is the case and 
	 * the read status command probably has to be issued
	 *
	 * however, the ata subsystem does mmio and it expects to get the
	 * status from the driver when it does this read, so there is 
	 * some issue therefor we will actually issue a read status 
	 * command to the drive
	 */
	if (prv->dmaPending) {
		ret = CF_DMA_BUSY;
	}
	else {
		if (CMD_REG_WRITE(CF_COMMAND_READ_STATUS))
			ret =  REG_READ(CF_CARD_STATUS_OUT);
		else 
			ret = CF_CARD_STATUS_OUT_BSY;
	}
	dprintk4("return 0x%x\n", ret);

	return ret;
}

static uint8_t aumb4000_check_altstatus(struct ata_port* ap) 
{
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	uint8_t ret;

	if (prv->dmaPending) {
		ret = CF_DMA_BUSY;
	}
	else {
		/* see note above about why we actually issue the command */
		if (CMD_REG_WRITE(CF_COMMAND_READ_ALTERNATE_STATUS))
			ret =  REG_READ(CF_CARD_ALT_STATUS_OUT);
		else 
			ret = CF_CARD_STATUS_OUT_BSY;
	}
	dprintk4("return 0x%x\n", ret);

	return ret;
}

/**
 *	aumb4000_tf_load - send taskfile registers to host controller
 *	@ap: Port to which output is sent
 *	@tf: ATA taskfile register set
 *
 *	Outputs ATA taskfile to standard ATA host controller.
 *
 *	LOCKING:
 *	Inherited from caller.
 *
 *	again, taken from libata but don't do the wait_idle since
 *	we don't submit any commands at this time
 */
static void aumb4000_tf_load(struct ata_port* ap, const struct ata_taskfile* tf)
{
	dprintk0("write taskfile regs\n"
			"feature 0x%lx\n"
			"nsect   0x%lx\n"
			"lbal    0x%lx\n"
			"lbam    0x%lx\n"
			"lbah    0x%lx\n"
			"device  0x%lx\n",
			tf->feature, tf->nsect,
			tf->lbal, tf->lbam, tf->lbah, tf->device);

	if (tf->ctl != ap->last_ctl) {
		/* it's unclear but I think this should be sent to
		 * the hard drive, so do a cmd write.
		 * since, we are not writing any other commands in this
		 * function, we won't wait for idle but the cmd write
		 * does have it's own wait
		 */
		REG_WRITE(tf->ctl, CF_CARD_DEVICE_CONTROL);
		CMD_REG_WRITE(CF_COMMAND_WRITE_DEVICE_CONTROL);
		ap->last_ctl = tf->ctl;
		/* ata_wait_idle(ap); */
	}

	REG_WRITE(tf->feature, CF_CARD_FEATURES);
	REG_WRITE(tf->nsect, CF_CARD_SECTOR_COUNT);
	REG_WRITE(tf->lbal, CF_CARD_SECTOR_NUMBER);
	REG_WRITE(tf->lbam, CF_CARD_CYL_LOW);
	REG_WRITE(tf->lbah, CF_CARD_CYL_HIGH);

	if (tf->flags & ATA_TFLAG_DEVICE) {
		REG_WRITE(tf->device, CF_CARD_DRIVE_HEAD);
	}

	/* from libatata, just to document, we won't do this
	 * here since we aren't submitting any command to the 
	 * drive, only to the aumb4000 registers
	ata_wait_idle(ap);
	*/
}

/**
 *	aumb4000_tf_read - input device's ATA taskfile shadow registers
 *	@ap: Port from which input is read
 *	@tf: ATA taskfile register set for storing input
 *
 *	Reads ATA taskfile registers for currently-selected device
 *	into @tf.
 *
 *	LOCKING:
 *	Inherited from caller.
 */
static void aumb4000_tf_read(struct ata_port* ap, struct ata_taskfile* tf) 
{
	/* it appears, we actually have to issue the commands to 
	 * get these registers updated... sucks!
	 */
	dprintk4("reading taskfile regs start\n");
	tf->command = aumb4000_check_status(ap);
	CMD_REG_WRITE(CF_COMMAND_READ_ERROR);
	tf->feature = REG_READ(CF_CARD_ERROR_OUT);
	CMD_REG_WRITE(CF_COMMAND_READ_SECTOR_COUNT);
	tf->nsect   = REG_READ(CF_CARD_SECTOR_COUNT_OUT);
	CMD_REG_WRITE(CF_COMMAND_READ_SECTOR_NUMBER);
	tf->lbal    = REG_READ(CF_CARD_SECTOR_NUMBER_OUT);
	CMD_REG_WRITE(CF_COMMAND_READ_CYLINDER_LOW);
	tf->lbam    = REG_READ(CF_CARD_CYL_LOW_OUT);
	CMD_REG_WRITE(CF_COMMAND_READ_CYLINDER_HIGH);
	tf->lbah    = REG_READ(CF_CARD_CYL_HIGH_OUT);
	CMD_REG_WRITE(CF_COMMAND_READ_DRIVE_HEAD);
	tf->device  = REG_READ(CF_CARD_DRIVE_HEAD_OUT);
	dprintk4("reading taskfile regs end\n");
}

static void aumb4000_exec_command(struct ata_port* ap, 
		const struct ata_taskfile* tf) 
{
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	dprintk2("tf->COMMAND: 0x%04x\n", tf->command);
	/* convert command into format that is written to our
	 * controllers command register
	 */

	/* XXX this is what really should work */
	/* but something is messed up when the status is being read, the 
	 * type of command, direct block or cf_card, dictates when the
	 * controllers status register is being checked and i think that
	 * part of the problem here since status is being checked and i 
	 * get an error that the cmd is not cleared(like for EC, id check)
	 *
	 * looking at the spec more some command are PIO protocal while
	 * others are dma.  So I think a problem we may have is that commands
	 * issue thru the ctrller cmd reg implement the correct protocol 
	 * irregardles of the mode.  so and ID cmd will be pio while a read/write
	 * dma will use the mode int the controller reg.  by issuing commands
	 * thru the direct block command register the wrong protocol could
	 * used
	 *
	 */
	if (unlikely(tf->command == 0xec)) {
		REG_WRITE(tf->command, CF_CARD_COMMAND);
		CMD_REG_WRITE(CF_COMMAND_WRITE_COMMAND);
		prv->dev_id_cmd_done = 1;
	}
	else
		CMD_REG_WRITE(tf->command|0x100);
}

#if AUMB4000_DEBUG
static void show_fifo_dma(struct mobi_ata_prv_data* prv, const char* msg) 
{
	cf_card_write_data_control_flags wt = { 
		.d32 = REG_READ(CF_CARD_WRITE_DATA_CONTROL) 
	};
	cf_card_read_data_control_flags rd = { 
		.d32 = REG_READ(CF_CARD_READ_DATA_CONTROL)  
	};
	cf_flash_command_flags cmd = { 
		.d32 = REG_READ(CF_COMMAND) 
	};
	flash_control_flags ctl = { 
		.d32 = REG_READ(CF_CONTROL) 
	};
	flash_status_flags stat = { 
		.d32 = REG_READ(CF_STATUS) 
	};

	printk(KERN_WARNING "%s: %s\n", __FUNCTION__, msg);
	printk(KERN_WARNING "%s: Status       -- %x\n", __FUNCTION__, stat.d32);
	printk(KERN_WARNING "%s: CF_COMMAND   -- %x\n", __FUNCTION__, cmd.d32);
	printk(KERN_WARNING "%s: ReadControl  -- dr=%d da=%d\n", 
		__FUNCTION__, rd.b.dr, rd.b.da);
	printk(KERN_WARNING "%s: WriteControl -- dr=%d da=%d\n", 
		__FUNCTION__, wt.b.dr, wt.b.da);

	printk(KERN_WARNING "%s: ReadFifoBuffer  -- empty? %s, " 
		"full? %s [%08x]\n", __FUNCTION__, ctl.b.re?"yes":"no", 
		ctl.b.rf?"yes":"no", ctl.d32);

	printk(KERN_WARNING "%s: WriteFifoBuffer -- empty? %s, "
		"full? %s\n", __FUNCTION__, ctl.b.we?"yes":"no", 
		ctl.b.wf?"yes":"no");

	return;

}
#endif


static void aumb4000_bmdma_setup(struct ata_queued_cmd* qc) 
{
	struct ata_port* ap = qc->ap;
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	int ret;
	uint32_t err, dmaConfig = 
			MOBI_DMA_CONFIG_TRANSFER_WIDTH_32   |   
			MOBI_DMA_CONFIG_ADDRADJ_INC         |
			MOBI_DMA_CONFIG_BURST_SIZE_8; 

	dbg_func_in();

	/* see comment in dma_handler */
	prv->ap = ap;
	prv->qc = qc;

	prv->dma_error = 0;
	if (prv->dma_handle < 0) {
		if ((prv->dma_handle = 
					mobi_dma_request("pata_aumb4000", MOBI_DMA_O_NONE)) < 0) {
			err = DMA_REQUEST_ERR;
			goto err;
		}
		if ((ret = mobi_dma_setup_handler(prv->dma_handle, 
						(void*)aumb4000_dma_handler, (void*)prv))) {
			err = DMA_HANDLER_ERR;
			goto err_release;
		}
		if ((ret = mobi_dma_config(prv->dma_handle, DMA_CONFIG_XFER, 
						MOBI_DMA_CONFIG_DATA_WIDTH_32, NULL))) {
			err = DMA_CONFIG_XFER_ERR;
			goto err_release;
		}
		if ((ret = mobi_dma_config(prv->dma_handle, DMA_CONFIG_DST, 
						dmaConfig, NULL))) {
			err = DMA_CONFIG_DST_ERR;
			goto err_release;
		}
		if ((ret = mobi_dma_config(prv->dma_handle, DMA_CONFIG_SRC, 
						dmaConfig, NULL))) {
			err = DMA_CONFIG_SRC_ERR;
			goto err_release;
		}
	}
	/* argh, the idea was to keep a reference count of how often setup
	 * was called.  But, now I realize that setup and close is called
	 * for every *block* not just once for a single transfer which is a 
	 * problem for request/free-ing the dma channel which we want to use
	 * only on a need-be basis
	 */
	/* prv->dma_ref_count++; */

	return;

err_release:
	//if (prv->dma_ref_count == 0) {
	mobi_dma_free(prv->dma_handle);
	prv->dma_handle = -1;

err:
	prv->dma_error = err;
	error("bmdma_setup error: %d\n", err);
	return;
}

/* XXX huh, not sure about this(or any of the bmdma for
 * that matter).  this should be the dma status of the 
 * drive not sure if the dma status of an amba bus dma should
 * be here.  need to look at ata_bmdam_status and see what 
 * bmdma_addr is all about???
 */
static uint8_t aumb4000_bmdma_status(struct ata_port* ap) {

	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);

	if (MOBI_DMA_EVENT_TRANSFER_ERROR == prv->dma_event) {    
#if AUMB4000_DEBUG
		show_fifo_dma(prv, "DMA TRANSFER ERROR");
#endif
		return ATA_DMA_ERR;
	}
	return prv->bmdma_status;
}

static void aumb4000_bmdma_stop(struct ata_queued_cmd* qc) 
{
	struct ata_port* ap = qc->ap;
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	dbg_func_in();

	mobi_dma_disable(prv->dma_handle);
	/* XXX need a spinlock around refcount */
	/*
	if (prv->dma_ref_count-1 == 0) {
		prv->dma_ref_count--;
		mobi_dma_free(prv->dma_handle);
		prv->dma_handle = -1;
	}
	else {
		prv->dma_ref_count--;
	}
	*/

	return;
}

void aumb4000_dma_handler(mobi_dma_handle dmah, 
		mobi_dma_event dma_event, void *data) 
{
	struct mobi_ata_prv_data* prv = (struct mobi_ata_prv_data*)data;

	dbg_func_in();

	WARN_ON(0 == (prv->ap && prv->qc));
	prv->dma_status = 0;
	prv->dma_event = dma_event;
	prv->dmaPending = 0;

	/*
#if AUMB4000_DEBUG
	switch (dma_event) {
		case MOBI_DMA_EVENT_TRANSFER_COMPLETE:
			dprintk2("XFER_COMPLETE: status 0x%x, bytes transfered %d\n", 
					mobi_dma_get_status(dmah), mobi_dma_get_bytes(dmah));
			break;
		case MOBI_DMA_EVENT_TRANSFER_ERROR:
			dprintk2("XFER_ERROR: DMA failed\n");
			break;
	}
#endif
	*/
	dprintk1("got event %s\n", 
			(dma_event == MOBI_DMA_EVENT_TRANSFER_COMPLETE ?
				"XFER_COMPLETE" : "XFER_ERROR"));


	/* wouldn't the state machine still be stuck ? */
	if (MOBI_DMA_EVENT_TRANSFER_COMPLETE != dma_event) {
		return;
	}

	/* XXX ACK what's this, some way to send an error back !!!! */
	prv->qc->err_mask = 0;

	/*
	 ** as near as I can tell, enabling command interrupts should obviate
	 ** the need to manually call the ata host interrupt here. However, I
	 ** am suspicious of DMA write commands being ack'd from the dmac before
	 ** the transfer count has become the amount requested. Maybe, the command
	 ** interrupt has already happened, and, will never show up here. That's 
	 ** why we have to manually inject an interrupt into the ata host state
	 ** machine to keep things moving ....
	 **
	 */
	/* OK, here is the deal....
	 * 1) in theory we don't need a handler because the main purpose
	 * of the handler is to wake up the waitq; however, since we 
	 * can't sleep in the bmdma_start function we don't have a waitq to
	 * wake up. 
	 * 2) the problem with the state machine mentioned in the previous
	 * comment is this.  when bdma_start is called we should be getting 
	 * some kind of interrupt from the hard drive(this has nothing to 
	 * do with interrupts in the aurora controller).  it is this interrupt
	 * that will cause ata_host_intr to be called and keep the state machine
	 * moving.  however, we don't seem to be getting the interrupts correct.
	 * 
	 * sooo, by pure luck we can give the state machine a kick start from here
	 * but something is clearly broken.  however, the question is if this was
	 * working would there be some kind of race between the libata state 
	 * machine moving forward and the amba dma completion?  until the interrupt
	 * is working, we don't know!
	 */
	if ((prv->qc)->flags & ATA_QCFLAG_ACTIVE) {
		prv->bmdma_status = ATA_DMA_INTR;
		ata_host_intr(prv->ap, prv->qc); 
	}

	return;
}

static void aumb4000_bmdma_start(struct ata_queued_cmd* qc) 
{
	struct ata_port* ap = qc->ap;
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	int i, maxTries, dmaSz = 0;
	struct scatterlist *sg = qc->__sg;
	uint32_t dma_status;

	dbg_func_in();

	/* need something in lib ata to really pass back the error */
	if (prv->dma_error & DMA_SETUP_ERROR_MASK) {
		error("bmdma_start failed do to dma setup error");
		return;
	}

	/* find the total size of the sglist */
	for (i=0;i < qc->n_elem;i++)
		dmaSz += sg[i].length;

	dprintk1("qc->dma_dir: %s_DEVICE, xfer size %d, elements %d\n" , 
			qc->dma_dir == DMA_TO_DEVICE ? "TO" : "FROM", dmaSz, qc->n_elem);
	
	/* while debugging I noticed many of the qc->__sg list only had one 
	 * entry!  this means we can do a single instead of a sglist.  since
	 * single doesn't have to convert the list, create an lli list, and call 
	 * a bunch of other * code in the DMAC it should be faster. 
	 *
	 */
	prv->dma_dir = qc->dma_dir;
	if (qc->n_elem > 1) {
		if (mobi_dma_setup_sglist(prv->dma_handle, 
						qc->__sg, qc->n_elem, 
						dmaSz, CF_DATA_BASE, qc->dma_dir)) {
			error("mobi_dma_setup_sglist failed");
			return;
		}
	}
	else {
		if (qc->dma_dir == DMA_TO_DEVICE) {
			if (mobi_dma_setup_single(prv->dma_handle, 
							sg_dma_address(qc->__sg), 
							CF_DATA_BASE,
							dmaSz)) {
				error("mobi_dma_setup_single device write failed");
				return;
			}
		}
		else {
			if (mobi_dma_setup_single(prv->dma_handle, 
							CF_DATA_BASE,
							sg_dma_address(qc->__sg), 
							dmaSz)) {
				error("mobi_dma_setup_single device read failed");
				return;
			}
		}
	}

	prv->bmdma_status = 0;

	/* start the transfer between the driver and the controller */
	/* if UDMA/MWDMA was working we would need to use R/W DMA cmd here */
	if (DMA_FROM_DEVICE == qc->dma_dir) {
#ifdef UDMA_TESTING_ENABLE
		/* need to get this working */
		CMD_REG_WRITE(CF_COMMAND_CF_CARD_READ_DMA);
#else
		/* XXX this just pios data blocks  */
		CMD_REG_WRITE(CF_COMMAND_CF_CARD_READ_MULTIPLE);
#endif
	} 
	else {
#ifdef UDMA_TESTING_ENABLE
		/* need to get this working */
		CMD_REG_WRITE(CF_COMMAND_CF_CARD_WRITE_DMA);
#else
		/* XXX this just pios data blocks */
		CMD_REG_WRITE(CF_COMMAND_CF_CARD_WRITE_MULTIPLE);
#endif
	}

	prv->dmaPending = 1;
	if (mobi_dma_enable(prv->dma_handle)) {
		prv->dmaPending = 0;
		error("mobi_dma_enable failed");
		/* there was an error, but how to send it back ?? */
		return;
	}

	/* XXX in theory we shouldn't have to do any waiting here.  
	 * the drive should send an interrupt when it is done.  The question
	 * would be how soon after the interrupt does the controller clear
	 * the command reg and also how soon is the amba dma complete after
	 * the interrupt.  
	 */

	/* there is always a big delay, so instead of spinning, just
	 * delay big here in one shot
	 */
	udelay(500);

	/* first we see if tranfer of data to/from the drive is completed */
	maxTries = 0;
	while ((REG_READ(CF_COMMAND) && ++maxTries)) {
		udelay(20);
	}
	dprintk4("cf_cmd maxTries = %d\n", maxTries);

	/* now we need to make sure the amba dma completed, we can't do
	 * a waitq here since we can't sleep.  And you would think we 
	 * could poll on a state variable set in the dma event handler
	 * but we seem to loop forever, so we do it the hard way and 
	 * get the status from the the dmac.  this loop seems to only
	 * execute once though...
	 */
	maxTries = 100;
	do {
		udelay(20);
		dma_status = mobi_dma_get_status(prv->dma_handle);
	} while (maxTries-- && !(dma_status &
				(MOBI_DMA_STATUS_XFER_IDLE | MOBI_DMA_STATUS_ERROR)));
	if (!maxTries)
		warning("dma status timeout: status 0x%x", 
				mobi_dma_get_status(prv->dma_handle));
	dprintk4("dmaPending maxTries = %d, status = 0x%x\n", 
			maxTries, dma_status);
}

static void aumb4000_irq_clear(struct ata_port* ap) 
{
	struct mobi_ata_prv_data* prv = PRV_FROM_AP2(ap);
	gpio_interrupt_clear(prv->gpio_hdl, prv->gpio_shift);
}

#if AUMB4000_DEBUG
static void proc_advance_ptr(char **ptr, uint32_t buffer, unsigned long count)
{
	char *buf = (char *)buffer;
	while(((**ptr==' ') || (**ptr=='\t') || (**ptr=='\r') 
				|| (**ptr=='\n')) && (*ptr-buf<count)) {
		(*ptr)++;
	}
}

static int proc_get_next_arg(char **ptr, uint32_t buffer, unsigned long count)
{
	char *buf = (char *)buffer;
	proc_advance_ptr(ptr, buffer, count);
	if (*ptr-buf >= count) {
		return -EINVAL;
	}
	return 0;
}

/* get and return the next argument as an unsigned long */
static int get_ularg(char **ptr, uint32_t buffer, 
		unsigned long count, uint32_t *arg)
{
	char *buf = (char *)buffer;
	proc_advance_ptr(ptr, buffer, count);
	if (*ptr-buf >= count) {
		error("Invalid argument string");
		return -1;
	}
	else {
		*arg = simple_strtoul(*ptr, ptr, 0);
		if(*arg < 0) {
			error("invalid argument");
			return -1;
		}
	}
	return 0;
}

#define DBG_CMD_READ_REGISTER 	0x1
#define DBG_CMD_WRITE_REGISTER  0x2
#define DBG_CMD_LOG_LEVEL 		0x3
static int aumb4000_proc_wr_debug(struct file *file, 
		const char *buffer, unsigned long count, void *data)
{
	char *ptr = (char *)buffer;
	uint32_t cmd = 0, arg1 = -1, arg2 = -1, value;

	if (proc_get_next_arg(&ptr, (uint32_t)buffer, count) == 0) {
		cmd = simple_strtoul(ptr, &ptr, 0);
		if(cmd < 0) {
			error("Invalid debug command");
			goto err;
		}
	}
	else {
		printk("Invalid cmd string");
		goto err;
	}

	switch(cmd) {
		case DBG_CMD_READ_REGISTER: 
			get_ularg(&ptr, (uint32_t)buffer, count, &arg1);
			value = readl(reg_iobase+arg1);
			printk("Read at offset 0x%x is 0x%x(%d)\n",
					arg1, value, value);
			break;
		case DBG_CMD_WRITE_REGISTER:
			get_ularg(&ptr, (uint32_t)buffer, count, &arg1);
			get_ularg(&ptr, (uint32_t)buffer, count, &arg2);
			printk("Write 0x%x(%d) to offset 0x%x\n",
					arg2, arg2, arg1);
			writel(arg2, reg_iobase+arg1);
			break;
		case DBG_CMD_LOG_LEVEL:
			get_ularg(&ptr, (uint32_t)buffer, count, &loglevel);
			printk(KERN_INFO "Setting AUMB4000 debug level to %d\n", loglevel);
			break;
		default:
			printk(KERN_ERR "Unrecognized debug command: 0x%x\n", cmd);
			break;
	}

err:
	while(ptr-buffer<count)
		ptr++;

	return ptr - buffer;
}
#endif

/* it appears that if we don't handle the 0x35 command we will
 * hang when shutting down the system
 */
static int aumb4000_scsi_queuecmd(struct scsi_cmnd* scmd, 
		void (*done)(struct scsi_cmnd*)) 
{
	int ret = 0 ;
	dbg_func_in();

    if (scmd->cmnd[0] == 0x35 /*SYNCHRONIZE_CACHE*/) {
        // calls blk_end_sync_rq in block/ll_rw_blk.c
        done(scmd);
    } 
	else {
        ret = ata_scsi_queuecmd(scmd, done);
    }
    return ret;
}

void aumb4000_writeb(uint8_t data, void __iomem *addr)
{
	uint8_t cmd_offset = ((uint32_t)addr >> 2) & 0xf;
	uint16_t cmd = write_commands[cmd_offset];

	dprintk3("addr 0x%x, data 0x%x, cmd 0x%x\n", 
			(uint32_t)addr, data, cmd);
	writel(data, addr);
	CMD_REG_WRITE(cmd);
}

void aumb4000_writew(uint16_t data, void __iomem *addr)
{
	uint8_t cmd_offset = ((uint32_t)addr >> 2) & 0xf;
	uint16_t cmd = write_commands[cmd_offset];

	dprintk3("addr 0x%x, data 0x%x, cmd 0x%x\n", 
			(uint32_t)addr, data, cmd);
	writel(data, addr);
	CMD_REG_WRITE(cmd);
}

/* our direct block read register are offset 0x40 from
 * the writes, so just bump the offset here
 */
uint8_t aumb4000_readb(void __iomem *addr)
{
	uint8_t cmd_offset = ((uint32_t)addr >> 2) & 0xf;
	uint16_t cmd = read_commands[cmd_offset];
	uint8_t ret;

	CMD_REG_WRITE(cmd);
	ret = (readl(addr+0x40) & 0xff);
	dprintk3("addr 0x%x, cmd 0x%x, read data 0x%x\n", 
			(uint32_t)addr+0x40, cmd, ret);

	return ret;
}

uint16_t aumb4000_readw(void __iomem *addr)
{
	uint8_t cmd_offset = ((uint32_t)addr >> 2) & 0xf;
	uint16_t cmd = read_commands[cmd_offset];
	uint16_t ret;

	CMD_REG_WRITE(cmd);
	ret = (readl(addr+0x40) & 0xffff);
	dprintk3("addr 0x%x, cmd 0x%x, read data 0x%x\n", 
			(uint32_t)addr+0x40, cmd, ret);
	return ret;
}

static struct scsi_host_template mobi_sht = {
	.module	    	= THIS_MODULE,
	.name	    	= DRV_NAME,
	.ioctl	    	= ata_scsi_ioctl,
	/* XXX BUG - unless we use our version we hang on reboot.  
	 * take a look at the std code to see what it is doing to
	 * try to figure this out
	.queuecommand   = ata_scsi_queuecmd,
	 */
	.queuecommand   = aumb4000_scsi_queuecmd,
	.can_queue	    = ATA_DEF_QUEUE,
	.this_id	    = ATA_SHT_THIS_ID,
	.sg_tablesize   = LIBATA_MAX_PRD,
	.cmd_per_lun    = ATA_SHT_CMD_PER_LUN,
	.emulated	    = ATA_SHT_EMULATED,
	.use_clustering = ATA_SHT_USE_CLUSTERING,
	.proc_name	    = DRV_NAME,
	.dma_boundary   = ATA_DMA_BOUNDARY,
	.slave_configure= ata_scsi_slave_config,
	.slave_destroy  = ata_scsi_slave_destroy,
	.bios_param	    = ata_std_bios_param,
	.resume	    	= ata_scsi_device_resume,
	.suspend        = ata_scsi_device_suspend,
};

/*
 *  ata_port_operations defined in libata.h
 */
/* need to continue to take a closer look at these operations and 
 * see which we can use the std funcs and which we need special one.
 * filling in the ata_ioports address will help us with some but not
 * others.  need to figure out if/when values need to be written to 
 * drive.  for example, the taskfile register are writen in the controller
 * but not explicitly written to the drive.  but what about softreset?
 * do we need to write the control reg to the driver for soft reset to 
 * happen. so lots of questions of when/if certain things should be
 * commited to the drive.
 */
static struct ata_port_operations mobi_port_ops = {
	.port_disable   = ata_port_disable,
	/* seem to need this for now */
	.dev_config     = aumb4000_dev_config,
	/* we don't have ap->ioaddr defined so don't use this yet 
	 * .mode_filter    = ata_pci_default_filter,
	 * but unless we have some mode we want disable(which we
	 * probably do) we don't need this */
	.mode_filter    = NULL,
	/* should be able to replace set mode with pio/dma but not working */
#ifdef USE_SINGLE_SETMODE
	.set_mode		= aumb4000_set_mode,
#else
	.set_piomode	= aumb4000_set_piomode,
	.set_dmamode	= aumb4000_set_dmamode,
#endif
	/* use our load to minimize function calls to our prvio funcs */
	.tf_load	    = aumb4000_tf_load,
	.tf_read	    = aumb4000_tf_read,
	/* our exec add 0x100 to all ata cmds since that is what 
	 * our controller wants */
	.exec_command   = aumb4000_exec_command,
	/* our status also checks if we have a amba dma running */
	.check_status   = aumb4000_check_status,
	.check_altstatus= aumb4000_check_altstatus,
	.dev_select     = ata_std_dev_select,
	.bmdma_setup    = aumb4000_bmdma_setup,
	.bmdma_start    = aumb4000_bmdma_start,
	.bmdma_stop	    = aumb4000_bmdma_stop,
	.bmdma_status   = aumb4000_bmdma_status,
	.data_xfer	    = ata_prvio_data_xfer,
	.qc_prep 	    = ata_qc_prep,
	.qc_issue	    = ata_qc_issue_prot,
	.eng_timeout    = 0,
	.freeze	    	= NULL,
	.thaw	    	= NULL,
	.error_handler  = ata_bmdma_error_handler,
	.irq_handler    = ata_interrupt,
	.irq_clear	    = aumb4000_irq_clear,
	.scr_read       = 0,
	.scr_write      = 0,
	.port_suspend   = 0,
	.port_resume    = 0,
	.port_start	    = ata_port_start,
	.port_stop	    = ata_port_stop,
	.host_stop	    = NULL,
	.pwriteb		= aumb4000_writeb,
	.pwritew		= aumb4000_writew,
	.preadb			= aumb4000_readb,
	.preadw			= aumb4000_readw,
};

#if 0
static int scsi_scsi_write_proc(struct file* f, 
		const char* buffer, unsigned long count, void* data) 
{

	int test = 0;
	int i = 128;
	const char* p = buffer ;
	struct ata_port* ap = (struct ata_port*)data;
	if (strncmp(p, "test=", strlen("test="))) {
		return count ;
	}
	if (1 != sscanf(p, "test=%d", &test)) {
		return count;
	}
	switch(test) {
		/*
		 **  if the BSY bit is set on the drive, this will clear it ...
		 */
		case 0  :
			printk(KERN_WARNING 
					"clearing interrupts by reading alt/stat registers\n");
			(void)aurora_inb(ap, FROM_DRV_ALT_STAT);  
			(void)aurora_inb(ap, FROM_DRV_STATUS);
			break;
		case 2:
			do { 
				(void)readl((unsigned long)(PRV_FROM_AP2(ap))->DATA); 
			} while (--i);
			break;
		default:
			if (test > 6) {
				break;
			}
	}
	return count ;
}
#endif

static int aumb4000_ata_reg_remove(struct amba_device *pdev);
static int aumb4000_ata_reg_suspend(struct amba_device *dev, pm_message_t state);
static int aumb4000_ata_reg_resume(struct amba_device *dev);
static int aumb4000_ata_reg_probe(struct amba_device *pdev, void *id);

static struct amba_id ata_reg_ids[] __initdata = {
	{
		.id     = 0x00041114,
		.mask   = 0x000fffff,
	},
	{ 0, 0 },
};

static struct amba_driver aumb4000_ata_reg_driver = {
	.probe      = aumb4000_ata_reg_probe,
	.remove     = aumb4000_ata_reg_remove,
	.suspend    = aumb4000_ata_reg_suspend,
	.resume     = aumb4000_ata_reg_resume,
	.id_table   = ata_reg_ids,
	.drv = { .name   = IDEREG_AMBA_NAME, },
};

/* lets set these up, this is what the libata uses but it expects
 * them to be mmap to the real device not a controller so it's 
 * beginning to look very difficult to use this.  to bad the libata
 * does allow you to give function pointers for read and write!
 * I think then we could solve the issue!
 */
static void setup_port_mapping(struct ata_ioports *ioaddr)
{
	uint8_t shift = 2;

	/* outputs */
	ioaddr->data_addr	= ioaddr->cmd_addr + (ATA_REG_DATA    << shift);
	ioaddr->feature_addr= ioaddr->cmd_addr + (ATA_REG_FEATURE << shift);
	/* nsect is sector count, not sector number.. huh? */
	ioaddr->nsect_addr	= ioaddr->cmd_addr + (ATA_REG_NSECT   << shift);
	/* this corresponds to sector number */
	ioaddr->lbal_addr	= ioaddr->cmd_addr + (ATA_REG_LBAL    << shift);
	ioaddr->lbam_addr	= ioaddr->cmd_addr + (ATA_REG_LBAM    << shift);
	ioaddr->lbah_addr	= ioaddr->cmd_addr + (ATA_REG_LBAH    << shift);
	ioaddr->device_addr	= ioaddr->cmd_addr + (ATA_REG_DEVICE  << shift);
	ioaddr->command_addr= ioaddr->cmd_addr + (ATA_REG_CMD     << shift);
	/* there doesn't seem the be a ATA_REG for ctl/altstatus but it 
	 * is just one more then CMD */
	ioaddr->ctl_addr    = ioaddr->cmd_addr + ((ATA_REG_CMD+1) << shift);

	/* these are the _out reg register, not all are aliased in the 
	 * ata_ioports struct which means all our tf reads have to occur in this
	 * driver.  these three do have aliases so we can redefine.  The out
	 */
	ioaddr->error_addr	= ioaddr->cmd_addr + (ATA_REG_ERR << shift);
	ioaddr->status_addr	= ioaddr->cmd_addr + (ATA_REG_STATUS << shift);
	ioaddr->altstatus_addr	= ioaddr->ctl_addr;
	dprintk0("data_addr  0x%lx\n"
			"feature_addr 0x%lx\n"
			"nsect_addr 0x%lx\n"
			"lbal_addr 0x%lx\n"
			"lbam_addr 0x%lx\n"
			"lbah_addr 0x%lx\n"
			"device_addr 0x%lx\n"
			"command_addr 0x%lx\n"
			"ctl_addr 0x%lx\n",
			ioaddr->data_addr,
			ioaddr->feature_addr,
			ioaddr->nsect_addr,
			ioaddr->lbal_addr,
			ioaddr->lbam_addr,
			ioaddr->lbah_addr,
			ioaddr->device_addr,
			ioaddr->command_addr,
			ioaddr->ctl_addr);
}

int device_powerup(void)
{
	uint32_t qcc_sysconfig_shadow =
		mobi_qcc_readreg(MOBI_QCCSISC_SYS_CONFIG_OFFSET) ;

	MOBI_QCCSISC_SYS_CONFIG_CF_RAM_EN_SET(qcc_sysconfig_shadow);
	/* clear enable this for DMA */
	MOBI_QCCSISC_SYS_CONFIG_CF_PC_MODE_CLR(qcc_sysconfig_shadow);
	if (mobi_qcc_writereg(qcc_sysconfig_shadow, MOBI_QCCSISC_SYS_CONFIG_OFFSET)) {
		error("Could not config qcc registers");
		return -EIO;
	}
	mobi_reset_disable(RESET_ID_CF);

	return 0;
}

void device_powerdown(void)
{
	uint32_t qcc_sysconfig_shadow =
		mobi_qcc_readreg(MOBI_QCCSISC_SYS_CONFIG_OFFSET);

	MOBI_QCCSISC_SYS_CONFIG_CF_RAM_EN_CLR(qcc_sysconfig_shadow);
	mobi_qcc_writereg(qcc_sysconfig_shadow, MOBI_QCCSISC_SYS_CONFIG_OFFSET);
	mobi_reset_enable(RESET_ID_CF);
}

static int aumb4000_ata_reg_remove(struct amba_device *pdev) 
{
	struct aumb4000_device_data_t *aumb4k = 
		(struct aumb4000_device_data_t *)pdev->dev.platform_data;
	struct mobi_ata_prv_data* prv;

	if (!aumb4k) {
		printk(KERN_WARNING "No aumb4k data ..\n");
		return -1;
	}

	if (0 == (prv = (struct mobi_ata_prv_data*)aumb4k->private_data)) {
		printk(KERN_WARNING "No aumb4k->private_data ..\n");
		return -1;
	}

	/* XXX seems like I should have a different pointer to host 
	 * somewhere
	 */
	ata_host_remove((prv->ap)->host);

	if (prv->gpio_hdl) 
		gpio_driver_put(prv->gpio_hdl);

	if (reg_iobase) {
		iounmap(reg_iobase);
		reg_iobase = 0x0;
	}

#if AUMB4000_DEBUG
	remove_proc_entry("driver/ide/debug", NULL);
	remove_proc_entry("driver/ide", NULL);
#endif

	device_powerdown();

	kfree(prv);
	return(0);
}

static int aumb4000_ata_reg_suspend(struct amba_device *dev, 
		pm_message_t state) 
{ 
	return 0; 
}

static int aumb4000_ata_reg_resume(struct amba_device *dev) 
{ 
	return 0; 
}

static int aumb4000_ata_reg_probe(struct amba_device *pdev, void *id) 
{
	int32_t err = 0;
	flash_control_flags dfltFlashControl = { .d32 = 0 };
	cf_card_read_data_control_flags readctl = { .d32 = 0 };
	cf_card_write_data_control_flags writectl = { .d32 = 0 };
	struct ata_probe_ent ae = {
		.node               ={0},
		.dev                = 0,
		.port_ops           = &mobi_port_ops,
		.sht                = &mobi_sht,
		.port               = {
			{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, 
			{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, 
			{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, 
			{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, 
		},
		.n_ports            = 1,
		.dummy_port_mask    = 0,
		.pio_mask           = 0x1f, /* pio 0 thru 4 ...           */
		.mwdma_mask         = 0x0, /* multi word modes 0 thru 4 */
#ifdef UDMA_TESTING_ENABLE
		.udma_mask          = 0x7, /* ultra dma modes 0 thru 3   */
#else
		.udma_mask          = 0x0, /* ultra dma modes 0 thru 3   */
#endif
		.irq                = 0,
		.irq2               = 0,
		.irq_flags          = IRQF_PROBE_SHARED|IRQF_DISABLED,
		.port_flags         = 0,
		._host_flags        = 0,
		.mmio_base          = 0,
		.private_data       = 0,
		.pinfo2             = 0,
	};
#if AUMB4000_DEBUG
	struct proc_dir_entry *pentry;
#endif

	struct clk *ahb_clk = NULL;
	struct aumb4000_device_data_t* aumb4k = 
		(struct aumb4000_device_data_t *)pdev->dev.platform_data;

	if (!aumb4k) {
		error("No amba level private data for driver");
		return -EIO;
	}

	ae.private_data = kzalloc(sizeof(struct mobi_ata_prv_data), GFP_KERNEL);
	if (ae.private_data == NULL) {
		error("No memory for scsi host private data");
		return -ENOMEM;
	}

	/* ns_per_cycle global for now, should move this code to timing
	 * funcs where it is needed but wait till those funcs are 
	 * fleshed out
	 */
	ahb_clk = clk_get(&pdev->dev, "ahb");
	ns_per_cycle = (1000000000/clk_get_rate(ahb_clk));

	aumb4k->private_data = (void*)ae.private_data;
	((struct mobi_ata_prv_data*)ae.private_data)->aumb4k = aumb4k;

	if (device_powerup()) {
		error("Could not take device out of reset");
		err = -EIO;
		goto err_alloc;
	}

	reg_iobase = ioremap(CF_REG_BASE, CF_REG_SIZE);
	if (unlikely(!reg_iobase)) {
		dev_err(&pdev->dev, "failed to ioremap REG iobase\n");
		err = -ENXIO;
		goto err_pd;
	}

	INIT_LIST_HEAD(&ae.node);
	ae.dev = &pdev->dev;
	ae.dev->coherent_dma_mask = ~0x00;
	ae.irq = aumb4k->irq;
	ae.irq_flags = IRQF_DISABLED;
	//ae.port_flags = ATA_FLAG_PRVIO|ATA_FLAG_SKIP_D2H_BSY;
	ae.port_flags = ATA_FLAG_PRVIO;
	ae.mmio_base = reg_iobase;
	ae.port[0].cmd_addr = (uint32_t)reg_iobase + CF_CARD_DATA;
	setup_port_mapping(&ae.port[0]);

	((struct mobi_ata_prv_data*)ae.private_data)->dma_handle = -1;
	((struct mobi_ata_prv_data*)ae.private_data)->gpio_shift = aumb4k->gpio_shift;

	/* screw it, everything is udma, so only do that */
	dfltFlashControl.b.m = ULTRA_DMA_16;
	dfltFlashControl.b.sz = MOBI_DMA_CONFIG_BURST_SIZE_8; 
	REG_WRITE(dfltFlashControl.d32, CF_CONTROL);

	/* we are always going to do dma to/from the controller */
	readctl.d32 = REG_READ(CF_CARD_READ_DATA_CONTROL);
	readctl.b.de = 1; 
	readctl.b.thresh = MOBI_DMA_CONFIG_BURST_SIZE_8; 
	REG_WRITE(readctl.d32, CF_CARD_READ_DATA_CONTROL);

	writectl.d32 = REG_READ(CF_CARD_WRITE_DATA_CONTROL);
	writectl.b.de = 1; 
	writectl.b.thresh = MOBI_DMA_CONFIG_BURST_SIZE_8;
	REG_WRITE(writectl.d32, CF_CARD_WRITE_DATA_CONTROL);

	if (aumb4000_set_mode_timing(PIO0))
		goto err_remap;
	if (hard_reset())
		goto err_remap;
	if (config_gpio((struct mobi_ata_prv_data*)ae.private_data)) 
		goto err_remap;

#if AUMB4000_DEBUG
	ide_proc_dir = proc_mkdir("driver/ide", NULL);

	pentry = create_proc_entry("driver/ide/debug",
			S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH, NULL);
	if (pentry) {
		pentry->write_proc = aumb4000_proc_wr_debug;
	}
#endif

	/* returns number of ports registered */
	err = ata_device_add(&ae);
	dprintk2("ata_device_add registered %d device(s)\n", err);
	if  (err <= 0)
		goto err_gpio;

	return 0;

err_gpio:
	if (((struct mobi_ata_prv_data*)ae.private_data)->gpio_hdl)
		gpio_driver_put(((struct mobi_ata_prv_data*)ae.private_data)->gpio_hdl);

#if AUMB4000_DEBUG
	if (pentry)
		remove_proc_entry("driver/ide/debug", NULL);
	remove_proc_entry("driver/ide", NULL);
#endif

err_remap:
	iounmap(reg_iobase);
	reg_iobase = 0x0;

err_pd:
	device_powerdown();

err_alloc:
	kfree(ae.private_data);
	
	return err;
}

static int __init mobi_ata_init(void) 
{
	int ret;

	printk("Loading AU-MB4000 ATA Driver\n");

/* if not set locally or by modparam, then init */
#if defined(LOCAL_AUMB4000_DEBUG_ENABLE) || defined(CONFIG_AUMB4000_DEBUG)
	if (loglevel == -1)
#if defined(CONFIG_AUMB4000_DEBUG)
		loglevel = CONFIG_AUMB4000_DEBUG_LEVEL;
#else
		loglevel = 0;
#endif
#endif
	if (loglevel != -1) 
		printk(KERN_INFO "AU-MB4000 debug enabled,  loglevel is %d\n", loglevel);

	if (0 != (ret = amba_driver_register(&aumb4000_ata_reg_driver))) {
		printk(KERN_WARNING "%s: could not register register space\n", 
				DRV_NAME);
		return ret;
	}
	return ret;
}

static void __exit mobi_ata_exit(void) 
{
	amba_driver_unregister(&aumb4000_ata_reg_driver);
}

module_init(mobi_ata_init);
module_exit(mobi_ata_exit);
MODULE_AUTHOR("Mobilygen");
MODULE_DESCRIPTION("low-level driver for scsi/ata disk");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
