From 1d0933eaf976ba4f3ca00356f7124b1d12ddf168 Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Mon, 7 Oct 2013 15:53:02 +0530 Subject: spi: add TI QSPI driver Adds a SPI master driver for the TI QSPI peripheral. - Added quad read support. - Added memory mapped support. Signed-off-by: Matt Porter Signed-off-by: Sourav Poddar Signed-off-by: Jagannadha Sutradharudu Teki --- drivers/spi/Makefile | 1 + drivers/spi/ti_qspi.c | 311 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 312 insertions(+) create mode 100644 drivers/spi/ti_qspi.c (limited to 'drivers/spi') diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index 91d24cea58..e5941b09f6 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -38,6 +38,7 @@ COBJS-$(CONFIG_FDT_SPI) += fdt_spi.o COBJS-$(CONFIG_TEGRA20_SFLASH) += tegra20_sflash.o COBJS-$(CONFIG_TEGRA20_SLINK) += tegra20_slink.o COBJS-$(CONFIG_TEGRA114_SPI) += tegra114_spi.o +COBJS-$(CONFIG_TI_QSPI) += ti_qspi.o COBJS-$(CONFIG_XILINX_SPI) += xilinx_spi.o COBJS-$(CONFIG_ZYNQ_SPI) += zynq_spi.o diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c new file mode 100644 index 0000000000..5a5b482769 --- /dev/null +++ b/drivers/spi/ti_qspi.c @@ -0,0 +1,311 @@ +/* + * TI QSPI driver + * + * Copyright (C) 2013, Texas Instruments, Incorporated + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include +#include +#include +#include +#include + +/* ti qpsi register bit masks */ +#define QSPI_TIMEOUT 2000000 +#define QSPI_FCLK 192000000 +/* clock control */ +#define QSPI_CLK_EN (1 << 31) +#define QSPI_CLK_DIV_MAX 0xffff +/* command */ +#define QSPI_EN_CS(n) (n << 28) +#define QSPI_WLEN(n) ((n-1) << 19) +#define QSPI_3_PIN (1 << 18) +#define QSPI_RD_SNGL (1 << 16) +#define QSPI_WR_SNGL (2 << 16) +#define QSPI_INVAL (4 << 16) +#define QSPI_RD_QUAD (7 << 16) +/* device control */ +#define QSPI_DD(m, n) (m << (3 + n*8)) +#define QSPI_CKPHA(n) (1 << (2 + n*8)) +#define QSPI_CSPOL(n) (1 << (1 + n*8)) +#define QSPI_CKPOL(n) (1 << (n*8)) +/* status */ +#define QSPI_WC (1 << 1) +#define QSPI_BUSY (1 << 0) +#define QSPI_WC_BUSY (QSPI_WC | QSPI_BUSY) +#define QSPI_XFER_DONE QSPI_WC +#define MM_SWITCH 0x01 +#define MEM_CS 0x100 +#define MEM_CS_UNSELECT 0xfffff0ff +#define MMAP_START_ADDR 0x5c000000 +#define CORE_CTRL_IO 0x4a002558 + +#define QSPI_CMD_READ (0x3 << 0) +#define QSPI_CMD_READ_QUAD (0x6b << 0) +#define QSPI_CMD_READ_FAST (0x0b << 0) +#define QSPI_SETUP0_NUM_A_BYTES (0x2 << 8) +#define QSPI_SETUP0_NUM_D_BYTES_NO_BITS (0x0 << 10) +#define QSPI_SETUP0_NUM_D_BYTES_8_BITS (0x1 << 10) +#define QSPI_SETUP0_READ_NORMAL (0x0 << 12) +#define QSPI_SETUP0_READ_QUAD (0x3 << 12) +#define QSPI_CMD_WRITE (0x2 << 16) +#define QSPI_NUM_DUMMY_BITS (0x0 << 24) + +/* ti qspi register set */ +struct ti_qspi_regs { + u32 pid; + u32 pad0[3]; + u32 sysconfig; + u32 pad1[3]; + u32 int_stat_raw; + u32 int_stat_en; + u32 int_en_set; + u32 int_en_ctlr; + u32 intc_eoi; + u32 pad2[3]; + u32 clk_ctrl; + u32 dc; + u32 cmd; + u32 status; + u32 data; + u32 setup0; + u32 setup1; + u32 setup2; + u32 setup3; + u32 memswitch; + u32 data1; + u32 data2; + u32 data3; +}; + +/* ti qspi slave */ +struct ti_qspi_slave { + struct spi_slave slave; + struct ti_qspi_regs *base; + unsigned int mode; + u32 cmd; + u32 dc; +}; + +static inline struct ti_qspi_slave *to_ti_qspi_slave(struct spi_slave *slave) +{ + return container_of(slave, struct ti_qspi_slave, slave); +} + +static void ti_spi_setup_spi_register(struct ti_qspi_slave *qslave) +{ + struct spi_slave *slave = &qslave->slave; + u32 memval = 0; + + slave->memory_map = (void *)MMAP_START_ADDR; + + memval |= QSPI_CMD_READ | QSPI_SETUP0_NUM_A_BYTES | + QSPI_SETUP0_NUM_D_BYTES_NO_BITS | + QSPI_SETUP0_READ_NORMAL | QSPI_CMD_WRITE | + QSPI_NUM_DUMMY_BITS; + + writel(memval, &qslave->base->setup0); +} + +static void ti_spi_set_speed(struct spi_slave *slave, uint hz) +{ + struct ti_qspi_slave *qslave = to_ti_qspi_slave(slave); + uint clk_div; + + debug("ti_spi_set_speed: hz: %d, clock divider %d\n", hz, clk_div); + + if (!hz) + clk_div = 0; + else + clk_div = (QSPI_FCLK / hz) - 1; + + /* disable SCLK */ + writel(readl(&qslave->base->clk_ctrl) & ~QSPI_CLK_EN, + &qslave->base->clk_ctrl); + + /* assign clk_div values */ + if (clk_div < 0) + clk_div = 0; + else if (clk_div > QSPI_CLK_DIV_MAX) + clk_div = QSPI_CLK_DIV_MAX; + + /* enable SCLK */ + writel(QSPI_CLK_EN | clk_div, &qslave->base->clk_ctrl); +} + +int spi_cs_is_valid(unsigned int bus, unsigned int cs) +{ + return 1; +} + +void spi_cs_activate(struct spi_slave *slave) +{ + /* CS handled in xfer */ + return; +} + +void spi_cs_deactivate(struct spi_slave *slave) +{ + struct ti_qspi_slave *qslave = to_ti_qspi_slave(slave); + + debug("spi_cs_deactivate: 0x%08x\n", (u32)slave); + + writel(qslave->cmd | QSPI_INVAL, &qslave->base->cmd); +} + +void spi_init(void) +{ + /* nothing to do */ +} + +struct spi_slave *spi_setup_slave(unsigned int bus, unsigned int cs, + unsigned int max_hz, unsigned int mode) +{ + struct ti_qspi_slave *qslave; + + qslave = spi_alloc_slave(struct ti_qspi_slave, bus, cs); + if (!qslave) { + printf("SPI_error: Fail to allocate ti_qspi_slave\n"); + return NULL; + } + + qslave->base = (struct ti_qspi_regs *)QSPI_BASE; + qslave->mode = mode; + + ti_spi_set_speed(&qslave->slave, max_hz); + +#ifdef CONFIG_TI_SPI_MMAP + ti_spi_setup_spi_register(qslave); +#endif + + return &qslave->slave; +} + +void spi_free_slave(struct spi_slave *slave) +{ + struct ti_qspi_slave *qslave = to_ti_qspi_slave(slave); + free(qslave); +} + +int spi_claim_bus(struct spi_slave *slave) +{ + struct ti_qspi_slave *qslave = to_ti_qspi_slave(slave); + + debug("spi_claim_bus: bus:%i cs:%i\n", slave->bus, slave->cs); + + qslave->dc = 0; + if (qslave->mode & SPI_CPHA) + qslave->dc |= QSPI_CKPHA(slave->cs); + if (qslave->mode & SPI_CPOL) + qslave->dc |= QSPI_CKPOL(slave->cs); + if (qslave->mode & SPI_CS_HIGH) + qslave->dc |= QSPI_CSPOL(slave->cs); + + writel(qslave->dc, &qslave->base->dc); + writel(0, &qslave->base->cmd); + writel(0, &qslave->base->data); + + return 0; +} + +void spi_release_bus(struct spi_slave *slave) +{ + struct ti_qspi_slave *qslave = to_ti_qspi_slave(slave); + + debug("spi_release_bus: bus:%i cs:%i\n", slave->bus, slave->cs); + + writel(0, &qslave->base->dc); + writel(0, &qslave->base->cmd); + writel(0, &qslave->base->data); +} + +int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout, + void *din, unsigned long flags) +{ + struct ti_qspi_slave *qslave = to_ti_qspi_slave(slave); + uint words = bitlen >> 3; /* fixed 8-bit word length */ + const uchar *txp = dout; + uchar *rxp = din; + uint status; + int timeout, val; + + debug("spi_xfer: bus:%i cs:%i bitlen:%i words:%i flags:%lx\n", + slave->bus, slave->cs, bitlen, words, flags); + + /* Setup mmap flags */ + if (flags & SPI_XFER_MMAP) { + writel(MM_SWITCH, &qslave->base->memswitch); + val = readl(CORE_CTRL_IO); + val |= MEM_CS; + writel(val, CORE_CTRL_IO); + return 0; + } else if (flags & SPI_XFER_MMAP_END) { + writel(~MM_SWITCH, &qslave->base->memswitch); + val = readl(CORE_CTRL_IO); + val &= MEM_CS_UNSELECT; + writel(val, CORE_CTRL_IO); + return 0; + } + + if (bitlen == 0) + return -1; + + if (bitlen % 8) { + debug("spi_xfer: Non byte aligned SPI transfer\n"); + return -1; + } + + /* Setup command reg */ + qslave->cmd = 0; + qslave->cmd |= QSPI_WLEN(8); + qslave->cmd |= QSPI_EN_CS(slave->cs); + if (flags & SPI_3WIRE) + qslave->cmd |= QSPI_3_PIN; + qslave->cmd |= 0xfff; + + while (words--) { + if (txp) { + debug("tx cmd %08x dc %08x data %02x\n", + qslave->cmd | QSPI_WR_SNGL, qslave->dc, *txp); + writel(*txp++, &qslave->base->data); + writel(qslave->cmd | QSPI_WR_SNGL, + &qslave->base->cmd); + status = readl(&qslave->base->status); + timeout = QSPI_TIMEOUT; + while ((status & QSPI_WC_BUSY) != QSPI_XFER_DONE) { + if (--timeout < 0) { + printf("spi_xfer: TX timeout!\n"); + return -1; + } + status = readl(&qslave->base->status); + } + debug("tx done, status %08x\n", status); + } + if (rxp) { + qslave->cmd |= QSPI_RD_SNGL; + debug("rx cmd %08x dc %08x\n", + qslave->cmd, qslave->dc); + writel(qslave->cmd, &qslave->base->cmd); + status = readl(&qslave->base->status); + timeout = QSPI_TIMEOUT; + while ((status & QSPI_WC_BUSY) != QSPI_XFER_DONE) { + if (--timeout < 0) { + printf("spi_xfer: RX timeout!\n"); + return -1; + } + status = readl(&qslave->base->status); + } + *rxp++ = readl(&qslave->base->data); + debug("rx done, status %08x, read %02x\n", + status, *(rxp-1)); + } + } + + /* Terminate frame */ + if (flags & SPI_XFER_END) + spi_cs_deactivate(slave); + + return 0; +} -- cgit From 8d203afdd3bf54ad5b2523cdacaff4ac35da6b08 Mon Sep 17 00:00:00 2001 From: Rajeshwari Shinde Date: Tue, 8 Oct 2013 16:20:04 +0530 Subject: spi: exynos: Support a delay after deactivate For devices that need some time to react after a spi transaction finishes, add the ability to set a delay. Implement this as a delay on the first/next transaction to avoid any delay in the fairly common case where a SPI transaction is followed by other processing. Signed-off-by: Simon Glass Signed-off-by: Rajeshwari S Shinde Reviewed-by: Jagannadha Sutradharudu Teki --- drivers/spi/exynos_spi.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/spi') diff --git a/drivers/spi/exynos_spi.c b/drivers/spi/exynos_spi.c index efc8b1e3a5..d7fdaacebd 100644 --- a/drivers/spi/exynos_spi.c +++ b/drivers/spi/exynos_spi.c @@ -26,6 +26,7 @@ struct spi_bus { struct exynos_spi *regs; int inited; /* 1 if this bus is ready for use */ int node; + uint deactivate_delay_us; /* Delay to wait after deactivate */ }; /* A list of spi buses that we know about */ @@ -40,6 +41,8 @@ struct exynos_spi_slave { enum periph_id periph_id; /* Peripheral ID for this device */ unsigned int fifo_size; int skip_preamble; + struct spi_bus *bus; /* Pointer to our SPI bus info */ + ulong last_transaction_us; /* Time of last transaction end */ }; static struct spi_bus *spi_get_bus(unsigned dev_index) @@ -85,6 +88,7 @@ struct spi_slave *spi_setup_slave(unsigned int busnum, unsigned int cs, } bus = &spi_bus[busnum]; + spi_slave->bus = bus; spi_slave->regs = bus->regs; spi_slave->mode = mode; spi_slave->periph_id = bus->periph_id; @@ -95,6 +99,7 @@ struct spi_slave *spi_setup_slave(unsigned int busnum, unsigned int cs, spi_slave->fifo_size = 256; spi_slave->skip_preamble = 0; + spi_slave->last_transaction_us = timer_get_us(); spi_slave->freq = bus->frequency; if (max_hz) @@ -359,9 +364,22 @@ void spi_cs_activate(struct spi_slave *slave) { struct exynos_spi_slave *spi_slave = to_exynos_spi(slave); + /* If it's too soon to do another transaction, wait */ + if (spi_slave->bus->deactivate_delay_us && + spi_slave->last_transaction_us) { + ulong delay_us; /* The delay completed so far */ + delay_us = timer_get_us() - spi_slave->last_transaction_us; + if (delay_us < spi_slave->bus->deactivate_delay_us) + udelay(spi_slave->bus->deactivate_delay_us - delay_us); + } + clrbits_le32(&spi_slave->regs->cs_reg, SPI_SLAVE_SIG_INACT); debug("Activate CS, bus %d\n", spi_slave->slave.bus); spi_slave->skip_preamble = spi_slave->mode & SPI_PREAMBLE; + + /* Remember time of this transaction so we can honour the bus delay */ + if (spi_slave->bus->deactivate_delay_us) + spi_slave->last_transaction_us = timer_get_us(); } /** @@ -411,6 +429,8 @@ static int spi_get_config(const void *blob, int node, struct spi_bus *bus) /* Use 500KHz as a suitable default */ bus->frequency = fdtdec_get_int(blob, node, "spi-max-frequency", 500000); + bus->deactivate_delay_us = fdtdec_get_int(blob, node, + "spi-deactivate-delay", 0); return 0; } -- cgit From 120af1572a3647bb87eff2f62dd8f8a919ee71f0 Mon Sep 17 00:00:00 2001 From: Rajeshwari Shinde Date: Tue, 8 Oct 2013 16:20:05 +0530 Subject: spi: exynos: Minimise access to SPI FIFO level Accessing SPI registers is slow, but access to the FIFO level register in particular seems to be extraordinarily expensive (I measure up to 600ns). Perhaps it is required to synchronise with the SPI byte output logic which might run at 1/8th of the 40MHz SPI speed (just a guess). Reduce access to this register by filling up and emptying FIFOs more completely, rather than just one word each time around the inner loop. Since the rxfifo value will now likely be much greater that what we read before we fill the txfifo, we only fill the txfifo halfway. This is because if the txfifo is empty, but the rxfifo has data in it, then writing too much data to the txfifo may overflow the rxfifo as data arrives. This speeds up SPI flash reading from about 1MB/s to about 2MB/s on snow. Signed-off-by: Simon Glass Signed-off-by: Rajeshwari S Shinde Reviewed-by: Jagannadha Sutradharudu Teki --- drivers/spi/exynos_spi.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers/spi') diff --git a/drivers/spi/exynos_spi.c b/drivers/spi/exynos_spi.c index d7fdaacebd..7407d6cc12 100644 --- a/drivers/spi/exynos_spi.c +++ b/drivers/spi/exynos_spi.c @@ -247,24 +247,27 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, /* Keep the fifos full/empty. */ spi_get_fifo_levels(regs, &rx_lvl, &tx_lvl); - if (tx_lvl < spi_slave->fifo_size && out_bytes) { + while (tx_lvl < spi_slave->fifo_size/2 && out_bytes) { temp = txp ? *txp++ : 0xff; writel(temp, ®s->tx_data); out_bytes--; + tx_lvl++; } if (rx_lvl > 0) { - temp = readl(®s->rx_data); - if (spi_slave->skip_preamble) { - if (temp == SPI_PREAMBLE_END_BYTE) { - spi_slave->skip_preamble = 0; - stopping = 0; + while (rx_lvl > 0) { + temp = readl(®s->rx_data); + if (spi_slave->skip_preamble) { + if (temp == SPI_PREAMBLE_END_BYTE) { + spi_slave->skip_preamble = 0; + stopping = 0; + } + } else { + if (rxp || stopping) + *rxp++ = temp; + in_bytes--; } - } else { - if (rxp || stopping) - *rxp++ = temp; - in_bytes--; - } - toread--; + toread--; + rx_lvl--; } else if (!toread) { /* * We have run out of input data, but haven't read -- cgit From c4a796329d00ce46de6b5afeb1fdabec82830677 Mon Sep 17 00:00:00 2001 From: Rajeshwari Shinde Date: Tue, 8 Oct 2013 16:20:06 +0530 Subject: spi: exynos: Support word transfers Since SPI register access is so expensive, it is worth transferring data a word at a time if we can. This complicates the driver unfortunately. Use the byte-swapping feature to avoid having to convert to/from big endian in software. This change increases speed from about 2MB/s to about 4.5MB/s. Signed-off-by: Simon Glass Signed-off-by: Rajeshwari S Shinde Reviewed-by: Jagannadha Sutradharudu Teki --- drivers/spi/exynos_spi.c | 76 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 15 deletions(-) (limited to 'drivers/spi') diff --git a/drivers/spi/exynos_spi.c b/drivers/spi/exynos_spi.c index 7407d6cc12..699c57eb6d 100644 --- a/drivers/spi/exynos_spi.c +++ b/drivers/spi/exynos_spi.c @@ -204,12 +204,29 @@ static void spi_get_fifo_levels(struct exynos_spi *regs, * * @param regs SPI peripheral registers * @param count Number of bytes to transfer + * @param step Number of bytes to transfer in each packet (1 or 4) */ -static void spi_request_bytes(struct exynos_spi *regs, int count) +static void spi_request_bytes(struct exynos_spi *regs, int count, int step) { + /* For word address we need to swap bytes */ + if (step == 4) { + setbits_le32(®s->mode_cfg, + SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD); + count /= 4; + setbits_le32(®s->swap_cfg, SPI_TX_SWAP_EN | SPI_RX_SWAP_EN | + SPI_TX_BYTE_SWAP | SPI_RX_BYTE_SWAP | + SPI_TX_HWORD_SWAP | SPI_RX_HWORD_SWAP); + } else { + /* Select byte access and clear the swap configuration */ + clrbits_le32(®s->mode_cfg, + SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD); + writel(0, ®s->swap_cfg); + } + assert(count && count < (1 << 16)); setbits_le32(®s->ch_cfg, SPI_CH_RST); clrbits_le32(®s->ch_cfg, SPI_CH_RST); + writel(count | SPI_PACKET_CNT_EN, ®s->pkt_cnt); } @@ -224,17 +241,27 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, int toread; unsigned start = get_timer(0); int stopping; + int step; out_bytes = in_bytes = todo; stopping = spi_slave->skip_preamble && (flags & SPI_XFER_END) && !(spi_slave->mode & SPI_SLAVE); + /* + * Try to transfer words if we can. This helps read performance at + * SPI clock speeds above about 20MHz. + */ + step = 1; + if (!((todo | (uintptr_t)rxp | (uintptr_t)txp) & 3) && + !spi_slave->skip_preamble) + step = 4; + /* * If there's something to send, do a software reset and set a * transaction size. */ - spi_request_bytes(regs, todo); + spi_request_bytes(regs, todo, step); /* * Bytes are transmitted/received in pairs. Wait to receive all the @@ -247,14 +274,26 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, /* Keep the fifos full/empty. */ spi_get_fifo_levels(regs, &rx_lvl, &tx_lvl); + + /* + * Don't completely fill the txfifo, since we don't want our + * rxfifo to overflow, and it may already contain data. + */ while (tx_lvl < spi_slave->fifo_size/2 && out_bytes) { - temp = txp ? *txp++ : 0xff; + if (!txp) + temp = -1; + else if (step == 4) + temp = *(uint32_t *)txp; + else + temp = *txp; writel(temp, ®s->tx_data); - out_bytes--; - tx_lvl++; + out_bytes -= step; + if (txp) + txp += step; + tx_lvl += step; } - if (rx_lvl > 0) { - while (rx_lvl > 0) { + if (rx_lvl >= step) { + while (rx_lvl >= step) { temp = readl(®s->rx_data); if (spi_slave->skip_preamble) { if (temp == SPI_PREAMBLE_END_BYTE) { @@ -262,12 +301,15 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, stopping = 0; } } else { - if (rxp || stopping) - *rxp++ = temp; - in_bytes--; + if (rxp || stopping) { + *rxp = temp; + rxp += step; + } + in_bytes -= step; } - toread--; - rx_lvl--; + toread -= step; + rx_lvl -= step; + } } else if (!toread) { /* * We have run out of input data, but haven't read @@ -279,7 +321,7 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo, out_bytes = in_bytes; toread = in_bytes; txp = NULL; - spi_request_bytes(regs, toread); + spi_request_bytes(regs, toread, step); } if (spi_slave->skip_preamble && get_timer(start) > 100) { printf("SPI timeout: in_bytes=%d, out_bytes=%d, ", @@ -323,10 +365,14 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout, if ((flags & SPI_XFER_BEGIN)) spi_cs_activate(slave); - /* Exynos SPI limits each transfer to 65535 bytes */ + /* + * Exynos SPI limits each transfer to 65535 transfers. To keep + * things simple, allow a maximum of 65532 bytes. We could allow + * more in word mode, but the performance difference is small. + */ bytelen = bitlen / 8; for (upto = 0; !ret && upto < bytelen; upto += todo) { - todo = min(bytelen - upto, (1 << 16) - 1); + todo = min(bytelen - upto, (1 << 16) - 4); ret = spi_rx_tx(spi_slave, todo, &din, &dout, flags); if (ret) break; -- cgit