diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/ddr/altera/Makefile | 11 | ||||
-rw-r--r-- | drivers/ddr/altera/sdram.c | 535 | ||||
-rw-r--r-- | drivers/ddr/altera/sequencer.c | 3783 | ||||
-rw-r--r-- | drivers/ddr/altera/sequencer.h | 227 | ||||
-rw-r--r-- | drivers/fpga/socfpga.c | 3 | ||||
-rw-r--r-- | drivers/net/designware.c | 3 |
6 files changed, 4561 insertions, 1 deletions
diff --git a/drivers/ddr/altera/Makefile b/drivers/ddr/altera/Makefile new file mode 100644 index 0000000000..1ca705856d --- /dev/null +++ b/drivers/ddr/altera/Makefile @@ -0,0 +1,11 @@ +# +# (C) Copyright 2000-2003 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# (C) Copyright 2010, Thomas Chou <thomas@wytron.com.tw> +# Copyright (C) 2014 Altera Corporation <www.altera.com> +# +# SPDX-License-Identifier: GPL-2.0+ +# + +obj-$(CONFIG_ALTERA_SDRAM) += sdram.o sequencer.o diff --git a/drivers/ddr/altera/sdram.c b/drivers/ddr/altera/sdram.c new file mode 100644 index 0000000000..1ed2883d1b --- /dev/null +++ b/drivers/ddr/altera/sdram.c @@ -0,0 +1,535 @@ +/* + * Copyright Altera Corporation (C) 2014-2015 + * + * SPDX-License-Identifier: GPL-2.0+ + */ +#include <common.h> +#include <errno.h> +#include <div64.h> +#include <watchdog.h> +#include <asm/arch/fpga_manager.h> +#include <asm/arch/sdram.h> +#include <asm/arch/system_manager.h> +#include <asm/io.h> + +DECLARE_GLOBAL_DATA_PTR; + +struct sdram_prot_rule { + u32 sdram_start; /* SDRAM start address */ + u32 sdram_end; /* SDRAM end address */ + u32 rule; /* SDRAM protection rule number: 0-19 */ + int valid; /* Rule valid or not? 1 - valid, 0 not*/ + + u32 security; + u32 portmask; + u32 result; + u32 lo_prot_id; + u32 hi_prot_id; +}; + +static struct socfpga_system_manager *sysmgr_regs = + (struct socfpga_system_manager *)SOCFPGA_SYSMGR_ADDRESS; +static struct socfpga_sdr_ctrl *sdr_ctrl = + (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; + +/** + * get_errata_rows() - Up the number of DRAM rows to cover entire address space + * @cfg: SDRAM controller configuration data + * + * SDRAM Failure happens when accessing non-existent memory. Artificially + * increase the number of rows so that the memory controller thinks it has + * 4GB of RAM. This function returns such amount of rows. + */ +static int get_errata_rows(const struct socfpga_sdram_config *cfg) +{ + /* Define constant for 4G memory - used for SDRAM errata workaround */ +#define MEMSIZE_4G (4ULL * 1024ULL * 1024ULL * 1024ULL) + const unsigned long long memsize = MEMSIZE_4G; + const unsigned int cs = + ((cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_CSBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_CSBITS_LSB) + 1; + const unsigned int rows = + (cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_ROWBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_ROWBITS_LSB; + const unsigned int banks = + (cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_BANKBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_BANKBITS_LSB; + const unsigned int cols = + (cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_COLBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_COLBITS_LSB; + const unsigned int width = 8; + + unsigned long long newrows; + int bits, inewrowslog2; + + debug("workaround rows - memsize %lld\n", memsize); + debug("workaround rows - cs %d\n", cs); + debug("workaround rows - width %d\n", width); + debug("workaround rows - rows %d\n", rows); + debug("workaround rows - banks %d\n", banks); + debug("workaround rows - cols %d\n", cols); + + newrows = lldiv(memsize, cs * (width / 8)); + debug("rows workaround - term1 %lld\n", newrows); + + newrows = lldiv(newrows, (1 << banks) * (1 << cols)); + debug("rows workaround - term2 %lld\n", newrows); + + /* + * Compute the hamming weight - same as number of bits set. + * Need to see if result is ordinal power of 2 before + * attempting log2 of result. + */ + bits = generic_hweight32(newrows); + + debug("rows workaround - bits %d\n", bits); + + if (bits != 1) { + printf("SDRAM workaround failed, bits set %d\n", bits); + return rows; + } + + if (newrows > UINT_MAX) { + printf("SDRAM workaround rangecheck failed, %lld\n", newrows); + return rows; + } + + inewrowslog2 = __ilog2(newrows); + + debug("rows workaround - ilog2 %d, %lld\n", inewrowslog2, newrows); + + if (inewrowslog2 == -1) { + printf("SDRAM workaround failed, newrows %lld\n", newrows); + return rows; + } + + return inewrowslog2; +} + +/* SDRAM protection rules vary from 0-19, a total of 20 rules. */ +static void sdram_set_rule(struct sdram_prot_rule *prule) +{ + u32 lo_addr_bits; + u32 hi_addr_bits; + int ruleno = prule->rule; + + /* Select the rule */ + writel(ruleno, &sdr_ctrl->prot_rule_rdwr); + + /* Obtain the address bits */ + lo_addr_bits = prule->sdram_start >> 20ULL; + hi_addr_bits = prule->sdram_end >> 20ULL; + + debug("sdram set rule start %x, %d\n", lo_addr_bits, + prule->sdram_start); + debug("sdram set rule end %x, %d\n", hi_addr_bits, + prule->sdram_end); + + /* Set rule addresses */ + writel(lo_addr_bits | (hi_addr_bits << 12), &sdr_ctrl->prot_rule_addr); + + /* Set rule protection ids */ + writel(prule->lo_prot_id | (prule->hi_prot_id << 12), + &sdr_ctrl->prot_rule_id); + + /* Set the rule data */ + writel(prule->security | (prule->valid << 2) | + (prule->portmask << 3) | (prule->result << 13), + &sdr_ctrl->prot_rule_data); + + /* write the rule */ + writel(ruleno | (1 << 5), &sdr_ctrl->prot_rule_rdwr); + + /* Set rule number to 0 by default */ + writel(0, &sdr_ctrl->prot_rule_rdwr); +} + +static void sdram_get_rule(struct sdram_prot_rule *prule) +{ + u32 addr; + u32 id; + u32 data; + int ruleno = prule->rule; + + /* Read the rule */ + writel(ruleno, &sdr_ctrl->prot_rule_rdwr); + writel(ruleno | (1 << 6), &sdr_ctrl->prot_rule_rdwr); + + /* Get the addresses */ + addr = readl(&sdr_ctrl->prot_rule_addr); + prule->sdram_start = (addr & 0xFFF) << 20; + prule->sdram_end = ((addr >> 12) & 0xFFF) << 20; + + /* Get the configured protection IDs */ + id = readl(&sdr_ctrl->prot_rule_id); + prule->lo_prot_id = id & 0xFFF; + prule->hi_prot_id = (id >> 12) & 0xFFF; + + /* Get protection data */ + data = readl(&sdr_ctrl->prot_rule_data); + + prule->security = data & 0x3; + prule->valid = (data >> 2) & 0x1; + prule->portmask = (data >> 3) & 0x3FF; + prule->result = (data >> 13) & 0x1; +} + +static void +sdram_set_protection_config(const u32 sdram_start, const u32 sdram_end) +{ + struct sdram_prot_rule rule; + int rules; + + /* Start with accepting all SDRAM transaction */ + writel(0x0, &sdr_ctrl->protport_default); + + /* Clear all protection rules for warm boot case */ + memset(&rule, 0, sizeof(rule)); + + for (rules = 0; rules < 20; rules++) { + rule.rule = rules; + sdram_set_rule(&rule); + } + + /* new rule: accept SDRAM */ + rule.sdram_start = sdram_start; + rule.sdram_end = sdram_end; + rule.lo_prot_id = 0x0; + rule.hi_prot_id = 0xFFF; + rule.portmask = 0x3FF; + rule.security = 0x3; + rule.result = 0; + rule.valid = 1; + rule.rule = 0; + + /* set new rule */ + sdram_set_rule(&rule); + + /* default rule: reject everything */ + writel(0x3ff, &sdr_ctrl->protport_default); +} + +static void sdram_dump_protection_config(void) +{ + struct sdram_prot_rule rule; + int rules; + + debug("SDRAM Prot rule, default %x\n", + readl(&sdr_ctrl->protport_default)); + + for (rules = 0; rules < 20; rules++) { + sdram_get_rule(&rule); + debug("Rule %d, rules ...\n", rules); + debug(" sdram start %x\n", rule.sdram_start); + debug(" sdram end %x\n", rule.sdram_end); + debug(" low prot id %d, hi prot id %d\n", + rule.lo_prot_id, + rule.hi_prot_id); + debug(" portmask %x\n", rule.portmask); + debug(" security %d\n", rule.security); + debug(" result %d\n", rule.result); + debug(" valid %d\n", rule.valid); + } +} + +/** + * sdram_write_verify() - write to register and verify the write. + * @addr: Register address + * @val: Value to be written and verified + * + * This function writes to a register, reads back the value and compares + * the result with the written value to check if the data match. + */ +static unsigned sdram_write_verify(const u32 *addr, const u32 val) +{ + u32 rval; + + debug(" Write - Address 0x%p Data 0x%08x\n", addr, val); + writel(val, addr); + + debug(" Read and verify..."); + rval = readl(addr); + if (rval != val) { + debug("FAIL - Address 0x%p Expected 0x%08x Data 0x%08x\n", + addr, val, rval); + return -EINVAL; + } + + debug("correct!\n"); + return 0; +} + +/** + * sdr_get_ctrlcfg() - Get the value of DRAM CTRLCFG register + * @cfg: SDRAM controller configuration data + * + * Return the value of DRAM CTRLCFG register. + */ +static u32 sdr_get_ctrlcfg(const struct socfpga_sdram_config *cfg) +{ + const u32 csbits = + ((cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_CSBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_CSBITS_LSB) + 1; + u32 addrorder = + (cfg->ctrl_cfg & SDR_CTRLGRP_CTRLCFG_ADDRORDER_MASK) >> + SDR_CTRLGRP_CTRLCFG_ADDRORDER_LSB; + + u32 ctrl_cfg = cfg->ctrl_cfg; + + /* + * SDRAM Failure When Accessing Non-Existent Memory + * Set the addrorder field of the SDRAM control register + * based on the CSBITs setting. + */ + if (csbits == 1) { + if (addrorder != 0) + debug("INFO: Changing address order to 0 (chip, row, bank, column)\n"); + addrorder = 0; + } else if (csbits == 2) { + if (addrorder != 2) + debug("INFO: Changing address order to 2 (row, chip, bank, column)\n"); + addrorder = 2; + } + + ctrl_cfg &= ~SDR_CTRLGRP_CTRLCFG_ADDRORDER_MASK; + ctrl_cfg |= addrorder << SDR_CTRLGRP_CTRLCFG_ADDRORDER_LSB; + + return ctrl_cfg; +} + +/** + * sdr_get_addr_rw() - Get the value of DRAM ADDRW register + * @cfg: SDRAM controller configuration data + * + * Return the value of DRAM ADDRW register. + */ +static u32 sdr_get_addr_rw(const struct socfpga_sdram_config *cfg) +{ + /* + * SDRAM Failure When Accessing Non-Existent Memory + * Set SDR_CTRLGRP_DRAMADDRW_CSBITS_LSB to + * log2(number of chip select bits). Since there's only + * 1 or 2 chip selects, log2(1) => 0, and log2(2) => 1, + * which is the same as "chip selects" - 1. + */ + const int rows = get_errata_rows(cfg); + u32 dram_addrw = cfg->dram_addrw & ~SDR_CTRLGRP_DRAMADDRW_ROWBITS_MASK; + + return dram_addrw | (rows << SDR_CTRLGRP_DRAMADDRW_ROWBITS_LSB); +} + +/** + * sdr_load_regs() - Load SDRAM controller registers + * @cfg: SDRAM controller configuration data + * + * This function loads the register values into the SDRAM controller block. + */ +static void sdr_load_regs(const struct socfpga_sdram_config *cfg) +{ + const u32 ctrl_cfg = sdr_get_ctrlcfg(cfg); + const u32 dram_addrw = sdr_get_addr_rw(cfg); + + debug("\nConfiguring CTRLCFG\n"); + writel(ctrl_cfg, &sdr_ctrl->ctrl_cfg); + + debug("Configuring DRAMTIMING1\n"); + writel(cfg->dram_timing1, &sdr_ctrl->dram_timing1); + + debug("Configuring DRAMTIMING2\n"); + writel(cfg->dram_timing2, &sdr_ctrl->dram_timing2); + + debug("Configuring DRAMTIMING3\n"); + writel(cfg->dram_timing3, &sdr_ctrl->dram_timing3); + + debug("Configuring DRAMTIMING4\n"); + writel(cfg->dram_timing4, &sdr_ctrl->dram_timing4); + + debug("Configuring LOWPWRTIMING\n"); + writel(cfg->lowpwr_timing, &sdr_ctrl->lowpwr_timing); + + debug("Configuring DRAMADDRW\n"); + writel(dram_addrw, &sdr_ctrl->dram_addrw); + + debug("Configuring DRAMIFWIDTH\n"); + writel(cfg->dram_if_width, &sdr_ctrl->dram_if_width); + + debug("Configuring DRAMDEVWIDTH\n"); + writel(cfg->dram_dev_width, &sdr_ctrl->dram_dev_width); + + debug("Configuring LOWPWREQ\n"); + writel(cfg->lowpwr_eq, &sdr_ctrl->lowpwr_eq); + + debug("Configuring DRAMINTR\n"); + writel(cfg->dram_intr, &sdr_ctrl->dram_intr); + + debug("Configuring STATICCFG\n"); + writel(cfg->static_cfg, &sdr_ctrl->static_cfg); + + debug("Configuring CTRLWIDTH\n"); + writel(cfg->ctrl_width, &sdr_ctrl->ctrl_width); + + debug("Configuring PORTCFG\n"); + writel(cfg->port_cfg, &sdr_ctrl->port_cfg); + + debug("Configuring FIFOCFG\n"); + writel(cfg->fifo_cfg, &sdr_ctrl->fifo_cfg); + + debug("Configuring MPPRIORITY\n"); + writel(cfg->mp_priority, &sdr_ctrl->mp_priority); + + debug("Configuring MPWEIGHT_MPWEIGHT_0\n"); + writel(cfg->mp_weight0, &sdr_ctrl->mp_weight0); + writel(cfg->mp_weight1, &sdr_ctrl->mp_weight1); + writel(cfg->mp_weight2, &sdr_ctrl->mp_weight2); + writel(cfg->mp_weight3, &sdr_ctrl->mp_weight3); + + debug("Configuring MPPACING_MPPACING_0\n"); + writel(cfg->mp_pacing0, &sdr_ctrl->mp_pacing0); + writel(cfg->mp_pacing1, &sdr_ctrl->mp_pacing1); + writel(cfg->mp_pacing2, &sdr_ctrl->mp_pacing2); + writel(cfg->mp_pacing3, &sdr_ctrl->mp_pacing3); + + debug("Configuring MPTHRESHOLDRST_MPTHRESHOLDRST_0\n"); + writel(cfg->mp_threshold0, &sdr_ctrl->mp_threshold0); + writel(cfg->mp_threshold1, &sdr_ctrl->mp_threshold1); + writel(cfg->mp_threshold2, &sdr_ctrl->mp_threshold2); + + debug("Configuring PHYCTRL_PHYCTRL_0\n"); + writel(cfg->phy_ctrl0, &sdr_ctrl->phy_ctrl0); + + debug("Configuring CPORTWIDTH\n"); + writel(cfg->cport_width, &sdr_ctrl->cport_width); + + debug("Configuring CPORTWMAP\n"); + writel(cfg->cport_wmap, &sdr_ctrl->cport_wmap); + + debug("Configuring CPORTRMAP\n"); + writel(cfg->cport_rmap, &sdr_ctrl->cport_rmap); + + debug("Configuring RFIFOCMAP\n"); + writel(cfg->rfifo_cmap, &sdr_ctrl->rfifo_cmap); + + debug("Configuring WFIFOCMAP\n"); + writel(cfg->wfifo_cmap, &sdr_ctrl->wfifo_cmap); + + debug("Configuring CPORTRDWR\n"); + writel(cfg->cport_rdwr, &sdr_ctrl->cport_rdwr); + + debug("Configuring DRAMODT\n"); + writel(cfg->dram_odt, &sdr_ctrl->dram_odt); +} + +/** + * sdram_mmr_init_full() - Function to initialize SDRAM MMR + * @sdr_phy_reg: Value of the PHY control register 0 + * + * Initialize the SDRAM MMR. + */ +int sdram_mmr_init_full(unsigned int sdr_phy_reg) +{ + const struct socfpga_sdram_config *cfg = socfpga_get_sdram_config(); + const unsigned int rows = + (cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_ROWBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_ROWBITS_LSB; + int ret; + + writel(rows, &sysmgr_regs->iswgrp_handoff[4]); + + sdr_load_regs(cfg); + + /* saving this value to SYSMGR.ISWGRP.HANDOFF.FPGA2SDR */ + writel(cfg->fpgaport_rst, &sysmgr_regs->iswgrp_handoff[3]); + + /* only enable if the FPGA is programmed */ + if (fpgamgr_test_fpga_ready()) { + ret = sdram_write_verify(&sdr_ctrl->fpgaport_rst, + cfg->fpgaport_rst); + if (ret) + return ret; + } + + /* Restore the SDR PHY Register if valid */ + if (sdr_phy_reg != 0xffffffff) + writel(sdr_phy_reg, &sdr_ctrl->phy_ctrl0); + + /* Final step - apply configuration changes */ + debug("Configuring STATICCFG\n"); + clrsetbits_le32(&sdr_ctrl->static_cfg, + SDR_CTRLGRP_STATICCFG_APPLYCFG_MASK, + 1 << SDR_CTRLGRP_STATICCFG_APPLYCFG_LSB); + + sdram_set_protection_config(0, sdram_calculate_size() - 1); + + sdram_dump_protection_config(); + + return 0; +} + +/** + * sdram_calculate_size() - Calculate SDRAM size + * + * Calculate SDRAM device size based on SDRAM controller parameters. + * Size is specified in bytes. + */ +unsigned long sdram_calculate_size(void) +{ + unsigned long temp; + unsigned long row, bank, col, cs, width; + const struct socfpga_sdram_config *cfg = socfpga_get_sdram_config(); + const unsigned int csbits = + ((cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_CSBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_CSBITS_LSB) + 1; + const unsigned int rowbits = + (cfg->dram_addrw & SDR_CTRLGRP_DRAMADDRW_ROWBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_ROWBITS_LSB; + + temp = readl(&sdr_ctrl->dram_addrw); + col = (temp & SDR_CTRLGRP_DRAMADDRW_COLBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_COLBITS_LSB; + + /* + * SDRAM Failure When Accessing Non-Existent Memory + * Use ROWBITS from Quartus/QSys to calculate SDRAM size + * since the FB specifies we modify ROWBITs to work around SDRAM + * controller issue. + */ + row = readl(&sysmgr_regs->iswgrp_handoff[4]); + if (row == 0) + row = rowbits; + /* + * If the stored handoff value for rows is greater than + * the field width in the sdr.dramaddrw register then + * something is very wrong. Revert to using the the #define + * value handed off by the SOCEDS tool chain instead of + * using a broken value. + */ + if (row > 31) + row = rowbits; + + bank = (temp & SDR_CTRLGRP_DRAMADDRW_BANKBITS_MASK) >> + SDR_CTRLGRP_DRAMADDRW_BANKBITS_LSB; + + /* + * SDRAM Failure When Accessing Non-Existent Memory + * Use CSBITs from Quartus/QSys to calculate SDRAM size + * since the FB specifies we modify CSBITs to work around SDRAM + * controller issue. + */ + cs = csbits; + + width = readl(&sdr_ctrl->dram_if_width); + + /* ECC would not be calculated as its not addressible */ + if (width == SDRAM_WIDTH_32BIT_WITH_ECC) + width = 32; + if (width == SDRAM_WIDTH_16BIT_WITH_ECC) + width = 16; + + /* calculate the SDRAM size base on this info */ + temp = 1 << (row + bank + col); + temp = temp * cs * (width / 8); + + debug("%s returns %ld\n", __func__, temp); + + return temp; +} diff --git a/drivers/ddr/altera/sequencer.c b/drivers/ddr/altera/sequencer.c new file mode 100644 index 0000000000..2bd01092ee --- /dev/null +++ b/drivers/ddr/altera/sequencer.c @@ -0,0 +1,3783 @@ +/* + * Copyright Altera Corporation (C) 2012-2015 + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <common.h> +#include <asm/io.h> +#include <asm/arch/sdram.h> +#include <errno.h> +#include "sequencer.h" + +static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = + (struct socfpga_sdr_rw_load_manager *) + (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); +static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs = + (struct socfpga_sdr_rw_load_jump_manager *) + (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00); +static struct socfpga_sdr_reg_file *sdr_reg_file = + (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS; +static struct socfpga_sdr_scc_mgr *sdr_scc_mgr = + (struct socfpga_sdr_scc_mgr *) + (SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00); +static struct socfpga_phy_mgr_cmd *phy_mgr_cmd = + (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS; +static struct socfpga_phy_mgr_cfg *phy_mgr_cfg = + (struct socfpga_phy_mgr_cfg *) + (SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40); +static struct socfpga_data_mgr *data_mgr = + (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS; +static struct socfpga_sdr_ctrl *sdr_ctrl = + (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; + +const struct socfpga_sdram_rw_mgr_config *rwcfg; +const struct socfpga_sdram_io_config *iocfg; +const struct socfpga_sdram_misc_config *misccfg; + +#define DELTA_D 1 + +/* + * In order to reduce ROM size, most of the selectable calibration steps are + * decided at compile time based on the user's calibration mode selection, + * as captured by the STATIC_CALIB_STEPS selection below. + * + * However, to support simulation-time selection of fast simulation mode, where + * we skip everything except the bare minimum, we need a few of the steps to + * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the + * check, which is based on the rtl-supplied value, or we dynamically compute + * the value to use based on the dynamically-chosen calibration mode + */ + +#define DLEVEL 0 +#define STATIC_IN_RTL_SIM 0 +#define STATIC_SKIP_DELAY_LOOPS 0 + +#define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \ + STATIC_SKIP_DELAY_LOOPS) + +/* calibration steps requested by the rtl */ +u16 dyn_calib_steps; + +/* + * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option + * instead of static, we use boolean logic to select between + * non-skip and skip values + * + * The mask is set to include all bits when not-skipping, but is + * zero when skipping + */ + +u16 skip_delay_mask; /* mask off bits when skipping/not-skipping */ + +#define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ + ((non_skip_value) & skip_delay_mask) + +struct gbl_type *gbl; +struct param_type *param; + +static void set_failing_group_stage(u32 group, u32 stage, + u32 substage) +{ + /* + * Only set the global stage if there was not been any other + * failing group + */ + if (gbl->error_stage == CAL_STAGE_NIL) { + gbl->error_substage = substage; + gbl->error_stage = stage; + gbl->error_group = group; + } +} + +static void reg_file_set_group(u16 set_group) +{ + clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16); +} + +static void reg_file_set_stage(u8 set_stage) +{ + clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff); +} + +static void reg_file_set_sub_stage(u8 set_sub_stage) +{ + set_sub_stage &= 0xff; + clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8); +} + +/** + * phy_mgr_initialize() - Initialize PHY Manager + * + * Initialize PHY Manager. + */ +static void phy_mgr_initialize(void) +{ + u32 ratio; + + debug("%s:%d\n", __func__, __LINE__); + /* Calibration has control over path to memory */ + /* + * In Hard PHY this is a 2-bit control: + * 0: AFI Mux Select + * 1: DDIO Mux Select + */ + writel(0x3, &phy_mgr_cfg->mux_sel); + + /* USER memory clock is not stable we begin initialization */ + writel(0, &phy_mgr_cfg->reset_mem_stbl); + + /* USER calibration status all set to zero */ + writel(0, &phy_mgr_cfg->cal_status); + + writel(0, &phy_mgr_cfg->cal_debug_info); + + /* Init params only if we do NOT skip calibration. */ + if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) + return; + + ratio = rwcfg->mem_dq_per_read_dqs / + rwcfg->mem_virtual_groups_per_read_dqs; + param->read_correct_mask_vg = (1 << ratio) - 1; + param->write_correct_mask_vg = (1 << ratio) - 1; + param->read_correct_mask = (1 << rwcfg->mem_dq_per_read_dqs) - 1; + param->write_correct_mask = (1 << rwcfg->mem_dq_per_write_dqs) - 1; +} + +/** + * set_rank_and_odt_mask() - Set Rank and ODT mask + * @rank: Rank mask + * @odt_mode: ODT mode, OFF or READ_WRITE + * + * Set Rank and ODT mask (On-Die Termination). + */ +static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode) +{ + u32 odt_mask_0 = 0; + u32 odt_mask_1 = 0; + u32 cs_and_odt_mask; + + if (odt_mode == RW_MGR_ODT_MODE_OFF) { + odt_mask_0 = 0x0; + odt_mask_1 = 0x0; + } else { /* RW_MGR_ODT_MODE_READ_WRITE */ + switch (rwcfg->mem_number_of_ranks) { + case 1: /* 1 Rank */ + /* Read: ODT = 0 ; Write: ODT = 1 */ + odt_mask_0 = 0x0; + odt_mask_1 = 0x1; + break; + case 2: /* 2 Ranks */ + if (rwcfg->mem_number_of_cs_per_dimm == 1) { + /* + * - Dual-Slot , Single-Rank (1 CS per DIMM) + * OR + * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM) + * + * Since MEM_NUMBER_OF_RANKS is 2, they + * are both single rank with 2 CS each + * (special for RDIMM). + * + * Read: Turn on ODT on the opposite rank + * Write: Turn on ODT on all ranks + */ + odt_mask_0 = 0x3 & ~(1 << rank); + odt_mask_1 = 0x3; + } else { + /* + * - Single-Slot , Dual-Rank (2 CS per DIMM) + * + * Read: Turn on ODT off on all ranks + * Write: Turn on ODT on active rank + */ + odt_mask_0 = 0x0; + odt_mask_1 = 0x3 & (1 << rank); + } + break; + case 4: /* 4 Ranks */ + /* Read: + * ----------+-----------------------+ + * | ODT | + * Read From +-----------------------+ + * Rank | 3 | 2 | 1 | 0 | + * ----------+-----+-----+-----+-----+ + * 0 | 0 | 1 | 0 | 0 | + * 1 | 1 | 0 | 0 | 0 | + * 2 | 0 | 0 | 0 | 1 | + * 3 | 0 | 0 | 1 | 0 | + * ----------+-----+-----+-----+-----+ + * + * Write: + * ----------+-----------------------+ + * | ODT | + * Write To +-----------------------+ + * Rank | 3 | 2 | 1 | 0 | + * ----------+-----+-----+-----+-----+ + * 0 | 0 | 1 | 0 | 1 | + * 1 | 1 | 0 | 1 | 0 | + * 2 | 0 | 1 | 0 | 1 | + * 3 | 1 | 0 | 1 | 0 | + * ----------+-----+-----+-----+-----+ + */ + switch (rank) { + case 0: + odt_mask_0 = 0x4; + odt_mask_1 = 0x5; + break; + case 1: + odt_mask_0 = 0x8; + odt_mask_1 = 0xA; + break; + case 2: + odt_mask_0 = 0x1; + odt_mask_1 = 0x5; + break; + case 3: + odt_mask_0 = 0x2; + odt_mask_1 = 0xA; + break; + } + break; + } + } + + cs_and_odt_mask = (0xFF & ~(1 << rank)) | + ((0xFF & odt_mask_0) << 8) | + ((0xFF & odt_mask_1) << 16); + writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); +} + +/** + * scc_mgr_set() - Set SCC Manager register + * @off: Base offset in SCC Manager space + * @grp: Read/Write group + * @val: Value to be set + * + * This function sets the SCC Manager (Scan Chain Control Manager) register. + */ +static void scc_mgr_set(u32 off, u32 grp, u32 val) +{ + writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2)); +} + +/** + * scc_mgr_initialize() - Initialize SCC Manager registers + * + * Initialize SCC Manager registers. + */ +static void scc_mgr_initialize(void) +{ + /* + * Clear register file for HPS. 16 (2^4) is the size of the + * full register file in the scc mgr: + * RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + + * MEM_IF_READ_DQS_WIDTH - 1); + */ + int i; + + for (i = 0; i < 16; i++) { + debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n", + __func__, __LINE__, i); + scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i); + } +} + +static void scc_mgr_set_dqdqs_output_phase(u32 write_group, u32 phase) +{ + scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase); +} + +static void scc_mgr_set_dqs_bus_in_delay(u32 read_group, u32 delay) +{ + scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay); +} + +static void scc_mgr_set_dqs_en_phase(u32 read_group, u32 phase) +{ + scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase); +} + +static void scc_mgr_set_dqs_en_delay(u32 read_group, u32 delay) +{ + scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay); +} + +static void scc_mgr_set_dqs_io_in_delay(u32 delay) +{ + scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs, + delay); +} + +static void scc_mgr_set_dq_in_delay(u32 dq_in_group, u32 delay) +{ + scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay); +} + +static void scc_mgr_set_dq_out1_delay(u32 dq_in_group, u32 delay) +{ + scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay); +} + +static void scc_mgr_set_dqs_out1_delay(u32 delay) +{ + scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs, + delay); +} + +static void scc_mgr_set_dm_out1_delay(u32 dm, u32 delay) +{ + scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, + rwcfg->mem_dq_per_write_dqs + 1 + dm, + delay); +} + +/* load up dqs config settings */ +static void scc_mgr_load_dqs(u32 dqs) +{ + writel(dqs, &sdr_scc_mgr->dqs_ena); +} + +/* load up dqs io config settings */ +static void scc_mgr_load_dqs_io(void) +{ + writel(0, &sdr_scc_mgr->dqs_io_ena); +} + +/* load up dq config settings */ +static void scc_mgr_load_dq(u32 dq_in_group) +{ + writel(dq_in_group, &sdr_scc_mgr->dq_ena); +} + +/* load up dm config settings */ +static void scc_mgr_load_dm(u32 dm) +{ + writel(dm, &sdr_scc_mgr->dm_ena); +} + +/** + * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks + * @off: Base offset in SCC Manager space + * @grp: Read/Write group + * @val: Value to be set + * @update: If non-zero, trigger SCC Manager update for all ranks + * + * This function sets the SCC Manager (Scan Chain Control Manager) register + * and optionally triggers the SCC update for all ranks. + */ +static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val, + const int update) +{ + u32 r; + + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + scc_mgr_set(off, grp, val); + + if (update || (r == 0)) { + writel(grp, &sdr_scc_mgr->dqs_ena); + writel(0, &sdr_scc_mgr->update); + } + } +} + +static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase) +{ + /* + * USER although the h/w doesn't support different phases per + * shadow register, for simplicity our scc manager modeling + * keeps different phase settings per shadow reg, and it's + * important for us to keep them in sync to match h/w. + * for efficiency, the scan chain update should occur only + * once to sr0. + */ + scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET, + read_group, phase, 0); +} + +static void scc_mgr_set_dqdqs_output_phase_all_ranks(u32 write_group, + u32 phase) +{ + /* + * USER although the h/w doesn't support different phases per + * shadow register, for simplicity our scc manager modeling + * keeps different phase settings per shadow reg, and it's + * important for us to keep them in sync to match h/w. + * for efficiency, the scan chain update should occur only + * once to sr0. + */ + scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, + write_group, phase, 0); +} + +static void scc_mgr_set_dqs_en_delay_all_ranks(u32 read_group, + u32 delay) +{ + /* + * In shadow register mode, the T11 settings are stored in + * registers in the core, which are updated by the DQS_ENA + * signals. Not issuing the SCC_MGR_UPD command allows us to + * save lots of rank switching overhead, by calling + * select_shadow_regs_for_update with update_scan_chains + * set to 0. + */ + scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET, + read_group, delay, 1); + writel(0, &sdr_scc_mgr->update); +} + +/** + * scc_mgr_set_oct_out1_delay() - Set OCT output delay + * @write_group: Write group + * @delay: Delay value + * + * This function sets the OCT output delay in SCC manager. + */ +static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay) +{ + const int ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; + const int base = write_group * ratio; + int i; + /* + * Load the setting in the SCC manager + * Although OCT affects only write data, the OCT delay is controlled + * by the DQS logic block which is instantiated once per read group. + * For protocols where a write group consists of multiple read groups, + * the setting must be set multiple times. + */ + for (i = 0; i < ratio; i++) + scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay); +} + +/** + * scc_mgr_set_hhp_extras() - Set HHP extras. + * + * Load the fixed setting in the SCC manager HHP extras. + */ +static void scc_mgr_set_hhp_extras(void) +{ + /* + * Load the fixed setting in the SCC manager + * bits: 0:0 = 1'b1 - DQS bypass + * bits: 1:1 = 1'b1 - DQ bypass + * bits: 4:2 = 3'b001 - rfifo_mode + * bits: 6:5 = 2'b01 - rfifo clock_select + * bits: 7:7 = 1'b0 - separate gating from ungating setting + * bits: 8:8 = 1'b0 - separate OE from Output delay setting + */ + const u32 value = (0 << 8) | (0 << 7) | (1 << 5) | + (1 << 2) | (1 << 1) | (1 << 0); + const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | + SCC_MGR_HHP_GLOBALS_OFFSET | + SCC_MGR_HHP_EXTRAS_OFFSET; + + debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", + __func__, __LINE__); + writel(value, addr); + debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n", + __func__, __LINE__); +} + +/** + * scc_mgr_zero_all() - Zero all DQS config + * + * Zero all DQS config. + */ +static void scc_mgr_zero_all(void) +{ + int i, r; + + /* + * USER Zero all DQS config settings, across all groups and all + * shadow registers + */ + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { + /* + * The phases actually don't exist on a per-rank basis, + * but there's no harm updating them several times, so + * let's keep the code simple. + */ + scc_mgr_set_dqs_bus_in_delay(i, iocfg->dqs_in_reserve); + scc_mgr_set_dqs_en_phase(i, 0); + scc_mgr_set_dqs_en_delay(i, 0); + } + + for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) { + scc_mgr_set_dqdqs_output_phase(i, 0); + /* Arria V/Cyclone V don't have out2. */ + scc_mgr_set_oct_out1_delay(i, iocfg->dqs_out_reserve); + } + } + + /* Multicast to all DQS group enables. */ + writel(0xff, &sdr_scc_mgr->dqs_ena); + writel(0, &sdr_scc_mgr->update); +} + +/** + * scc_set_bypass_mode() - Set bypass mode and trigger SCC update + * @write_group: Write group + * + * Set bypass mode and trigger SCC update. + */ +static void scc_set_bypass_mode(const u32 write_group) +{ + /* Multicast to all DQ enables. */ + writel(0xff, &sdr_scc_mgr->dq_ena); + writel(0xff, &sdr_scc_mgr->dm_ena); + + /* Update current DQS IO enable. */ + writel(0, &sdr_scc_mgr->dqs_io_ena); + + /* Update the DQS logic. */ + writel(write_group, &sdr_scc_mgr->dqs_ena); + + /* Hit update. */ + writel(0, &sdr_scc_mgr->update); +} + +/** + * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group + * @write_group: Write group + * + * Load DQS settings for Write Group, do not trigger SCC update. + */ +static void scc_mgr_load_dqs_for_write_group(const u32 write_group) +{ + const int ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; + const int base = write_group * ratio; + int i; + /* + * Load the setting in the SCC manager + * Although OCT affects only write data, the OCT delay is controlled + * by the DQS logic block which is instantiated once per read group. + * For protocols where a write group consists of multiple read groups, + * the setting must be set multiple times. + */ + for (i = 0; i < ratio; i++) + writel(base + i, &sdr_scc_mgr->dqs_ena); +} + +/** + * scc_mgr_zero_group() - Zero all configs for a group + * + * Zero DQ, DM, DQS and OCT configs for a group. + */ +static void scc_mgr_zero_group(const u32 write_group, const int out_only) +{ + int i, r; + + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + /* Zero all DQ config settings. */ + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { + scc_mgr_set_dq_out1_delay(i, 0); + if (!out_only) + scc_mgr_set_dq_in_delay(i, 0); + } + + /* Multicast to all DQ enables. */ + writel(0xff, &sdr_scc_mgr->dq_ena); + + /* Zero all DM config settings. */ + for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) + scc_mgr_set_dm_out1_delay(i, 0); + + /* Multicast to all DM enables. */ + writel(0xff, &sdr_scc_mgr->dm_ena); + + /* Zero all DQS IO settings. */ + if (!out_only) + scc_mgr_set_dqs_io_in_delay(0); + + /* Arria V/Cyclone V don't have out2. */ + scc_mgr_set_dqs_out1_delay(iocfg->dqs_out_reserve); + scc_mgr_set_oct_out1_delay(write_group, iocfg->dqs_out_reserve); + scc_mgr_load_dqs_for_write_group(write_group); + + /* Multicast to all DQS IO enables (only 1 in total). */ + writel(0, &sdr_scc_mgr->dqs_io_ena); + + /* Hit update to zero everything. */ + writel(0, &sdr_scc_mgr->update); + } +} + +/* + * apply and load a particular input delay for the DQ pins in a group + * group_bgn is the index of the first dq pin (in the write group) + */ +static void scc_mgr_apply_group_dq_in_delay(u32 group_bgn, u32 delay) +{ + u32 i, p; + + for (i = 0, p = group_bgn; i < rwcfg->mem_dq_per_read_dqs; i++, p++) { + scc_mgr_set_dq_in_delay(p, delay); + scc_mgr_load_dq(p); + } +} + +/** + * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group + * @delay: Delay value + * + * Apply and load a particular output delay for the DQ pins in a group. + */ +static void scc_mgr_apply_group_dq_out1_delay(const u32 delay) +{ + int i; + + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { + scc_mgr_set_dq_out1_delay(i, delay); + scc_mgr_load_dq(i); + } +} + +/* apply and load a particular output delay for the DM pins in a group */ +static void scc_mgr_apply_group_dm_out1_delay(u32 delay1) +{ + u32 i; + + for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { + scc_mgr_set_dm_out1_delay(i, delay1); + scc_mgr_load_dm(i); + } +} + + +/* apply and load delay on both DQS and OCT out1 */ +static void scc_mgr_apply_group_dqs_io_and_oct_out1(u32 write_group, + u32 delay) +{ + scc_mgr_set_dqs_out1_delay(delay); + scc_mgr_load_dqs_io(); + + scc_mgr_set_oct_out1_delay(write_group, delay); + scc_mgr_load_dqs_for_write_group(write_group); +} + +/** + * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT + * @write_group: Write group + * @delay: Delay value + * + * Apply a delay to the entire output side: DQ, DM, DQS, OCT. + */ +static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group, + const u32 delay) +{ + u32 i, new_delay; + + /* DQ shift */ + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) + scc_mgr_load_dq(i); + + /* DM shift */ + for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) + scc_mgr_load_dm(i); + + /* DQS shift */ + new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay; + if (new_delay > iocfg->io_out2_delay_max) { + debug_cond(DLEVEL == 1, + "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", + __func__, __LINE__, write_group, delay, new_delay, + iocfg->io_out2_delay_max, + new_delay - iocfg->io_out2_delay_max); + new_delay -= iocfg->io_out2_delay_max; + scc_mgr_set_dqs_out1_delay(new_delay); + } + + scc_mgr_load_dqs_io(); + + /* OCT shift */ + new_delay = READ_SCC_OCT_OUT2_DELAY + delay; + if (new_delay > iocfg->io_out2_delay_max) { + debug_cond(DLEVEL == 1, + "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", + __func__, __LINE__, write_group, delay, + new_delay, iocfg->io_out2_delay_max, + new_delay - iocfg->io_out2_delay_max); + new_delay -= iocfg->io_out2_delay_max; + scc_mgr_set_oct_out1_delay(write_group, new_delay); + } + + scc_mgr_load_dqs_for_write_group(write_group); +} + +/** + * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks + * @write_group: Write group + * @delay: Delay value + * + * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks. + */ +static void +scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group, + const u32 delay) +{ + int r; + + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + scc_mgr_apply_group_all_out_delay_add(write_group, delay); + writel(0, &sdr_scc_mgr->update); + } +} + +/** + * set_jump_as_return() - Return instruction optimization + * + * Optimization used to recover some slots in ddr3 inst_rom could be + * applied to other protocols if we wanted to + */ +static void set_jump_as_return(void) +{ + /* + * To save space, we replace return with jump to special shared + * RETURN instruction so we set the counter to large value so that + * we always jump. + */ + writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0); + writel(rwcfg->rreturn, &sdr_rw_load_jump_mgr_regs->load_jump_add0); +} + +/** + * delay_for_n_mem_clocks() - Delay for N memory clocks + * @clocks: Length of the delay + * + * Delay for N memory clocks. + */ +static void delay_for_n_mem_clocks(const u32 clocks) +{ + u32 afi_clocks; + u16 c_loop; + u8 inner; + u8 outer; + + debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); + + /* Scale (rounding up) to get afi clocks. */ + afi_clocks = DIV_ROUND_UP(clocks, misccfg->afi_rate_ratio); + if (afi_clocks) /* Temporary underflow protection */ + afi_clocks--; + + /* + * Note, we don't bother accounting for being off a little + * bit because of a few extra instructions in outer loops. + * Note, the loops have a test at the end, and do the test + * before the decrement, and so always perform the loop + * 1 time more than the counter value + */ + c_loop = afi_clocks >> 16; + outer = c_loop ? 0xff : (afi_clocks >> 8); + inner = outer ? 0xff : afi_clocks; + + /* + * rom instructions are structured as follows: + * + * IDLE_LOOP2: jnz cntr0, TARGET_A + * IDLE_LOOP1: jnz cntr1, TARGET_B + * return + * + * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and + * TARGET_B is set to IDLE_LOOP2 as well + * + * if we have no outer loop, though, then we can use IDLE_LOOP1 only, + * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely + * + * a little confusing, but it helps save precious space in the inst_rom + * and sequencer rom and keeps the delays more accurate and reduces + * overhead + */ + if (afi_clocks < 0x100) { + writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), + &sdr_rw_load_mgr_regs->load_cntr1); + + writel(rwcfg->idle_loop1, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + + writel(rwcfg->idle_loop1, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET); + } else { + writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), + &sdr_rw_load_mgr_regs->load_cntr0); + + writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), + &sdr_rw_load_mgr_regs->load_cntr1); + + writel(rwcfg->idle_loop2, + &sdr_rw_load_jump_mgr_regs->load_jump_add0); + + writel(rwcfg->idle_loop2, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + + do { + writel(rwcfg->idle_loop2, + SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET); + } while (c_loop-- != 0); + } + debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); +} + +/** + * rw_mgr_mem_init_load_regs() - Load instruction registers + * @cntr0: Counter 0 value + * @cntr1: Counter 1 value + * @cntr2: Counter 2 value + * @jump: Jump instruction value + * + * Load instruction registers. + */ +static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump) +{ + u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET; + + /* Load counters */ + writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0), + &sdr_rw_load_mgr_regs->load_cntr0); + writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1), + &sdr_rw_load_mgr_regs->load_cntr1); + writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2), + &sdr_rw_load_mgr_regs->load_cntr2); + + /* Load jump address */ + writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0); + writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1); + writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2); + + /* Execute count instruction */ + writel(jump, grpaddr); +} + +/** + * rw_mgr_mem_load_user() - Load user calibration values + * @fin1: Final instruction 1 + * @fin2: Final instruction 2 + * @precharge: If 1, precharge the banks at the end + * + * Load user calibration values and optionally precharge the banks. + */ +static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2, + const int precharge) +{ + u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET; + u32 r; + + for (r = 0; r < rwcfg->mem_number_of_ranks; r++) { + /* set rank */ + set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); + + /* precharge all banks ... */ + if (precharge) + writel(rwcfg->precharge_all, grpaddr); + + /* + * USER Use Mirror-ed commands for odd ranks if address + * mirrorring is on + */ + if ((rwcfg->mem_address_mirroring >> r) & 0x1) { + set_jump_as_return(); + writel(rwcfg->mrs2_mirr, grpaddr); + delay_for_n_mem_clocks(4); + set_jump_as_return(); + writel(rwcfg->mrs3_mirr, grpaddr); + delay_for_n_mem_clocks(4); + set_jump_as_return(); + writel(rwcfg->mrs1_mirr, grpaddr); + delay_for_n_mem_clocks(4); + set_jump_as_return(); + writel(fin1, grpaddr); + } else { + set_jump_as_return(); + writel(rwcfg->mrs2, grpaddr); + delay_for_n_mem_clocks(4); + set_jump_as_return(); + writel(rwcfg->mrs3, grpaddr); + delay_for_n_mem_clocks(4); + set_jump_as_return(); + writel(rwcfg->mrs1, grpaddr); + set_jump_as_return(); + writel(fin2, grpaddr); + } + + if (precharge) + continue; + + set_jump_as_return(); + writel(rwcfg->zqcl, grpaddr); + + /* tZQinit = tDLLK = 512 ck cycles */ + delay_for_n_mem_clocks(512); + } +} + +/** + * rw_mgr_mem_initialize() - Initialize RW Manager + * + * Initialize RW Manager. + */ +static void rw_mgr_mem_initialize(void) +{ + debug("%s:%d\n", __func__, __LINE__); + + /* The reset / cke part of initialization is broadcasted to all ranks */ + writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); + + /* + * Here's how you load register for a loop + * Counters are located @ 0x800 + * Jump address are located @ 0xC00 + * For both, registers 0 to 3 are selected using bits 3 and 2, like + * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C + * I know this ain't pretty, but Avalon bus throws away the 2 least + * significant bits + */ + + /* Start with memory RESET activated */ + + /* tINIT = 200us */ + + /* + * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles + * If a and b are the number of iteration in 2 nested loops + * it takes the following number of cycles to complete the operation: + * number_of_cycles = ((2 + n) * a + 2) * b + * where n is the number of instruction in the inner loop + * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, + * b = 6A + */ + rw_mgr_mem_init_load_regs(misccfg->tinit_cntr0_val, + misccfg->tinit_cntr1_val, + misccfg->tinit_cntr2_val, + rwcfg->init_reset_0_cke_0); + + /* Indicate that memory is stable. */ + writel(1, &phy_mgr_cfg->reset_mem_stbl); + + /* + * transition the RESET to high + * Wait for 500us + */ + + /* + * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles + * If a and b are the number of iteration in 2 nested loops + * it takes the following number of cycles to complete the operation + * number_of_cycles = ((2 + n) * a + 2) * b + * where n is the number of instruction in the inner loop + * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, + * b = FF + */ + rw_mgr_mem_init_load_regs(misccfg->treset_cntr0_val, + misccfg->treset_cntr1_val, + misccfg->treset_cntr2_val, + rwcfg->init_reset_1_cke_0); + + /* Bring up clock enable. */ + + /* tXRP < 250 ck cycles */ + delay_for_n_mem_clocks(250); + + rw_mgr_mem_load_user(rwcfg->mrs0_dll_reset_mirr, rwcfg->mrs0_dll_reset, + 0); +} + +/** + * rw_mgr_mem_handoff() - Hand off the memory to user + * + * At the end of calibration we have to program the user settings in + * and hand off the memory to the user. + */ +static void rw_mgr_mem_handoff(void) +{ + rw_mgr_mem_load_user(rwcfg->mrs0_user_mirr, rwcfg->mrs0_user, 1); + /* + * Need to wait tMOD (12CK or 15ns) time before issuing other + * commands, but we will have plenty of NIOS cycles before actual + * handoff so its okay. + */ +} + +/** + * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command + * @group: Write Group + * @use_dm: Use DM + * + * Issue write test command. Two variants are provided, one that just tests + * a write pattern and another that tests datamask functionality. + */ +static void rw_mgr_mem_calibrate_write_test_issue(u32 group, + u32 test_dm) +{ + const u32 quick_write_mode = + (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) && + misccfg->enable_super_quick_calibration; + u32 mcc_instruction; + u32 rw_wl_nop_cycles; + + /* + * Set counter and jump addresses for the right + * number of NOP cycles. + * The number of supported NOP cycles can range from -1 to infinity + * Three different cases are handled: + * + * 1. For a number of NOP cycles greater than 0, the RW Mgr looping + * mechanism will be used to insert the right number of NOPs + * + * 2. For a number of NOP cycles equals to 0, the micro-instruction + * issuing the write command will jump straight to the + * micro-instruction that turns on DQS (for DDRx), or outputs write + * data (for RLD), skipping + * the NOP micro-instruction all together + * + * 3. A number of NOP cycles equal to -1 indicates that DQS must be + * turned on in the same micro-instruction that issues the write + * command. Then we need + * to directly jump to the micro-instruction that sends out the data + * + * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters + * (2 and 3). One jump-counter (0) is used to perform multiple + * write-read operations. + * one counter left to issue this command in "multiple-group" mode + */ + + rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; + + if (rw_wl_nop_cycles == -1) { + /* + * CNTR 2 - We want to execute the special write operation that + * turns on DQS right away and then skip directly to the + * instruction that sends out the data. We set the counter to a + * large number so that the jump is always taken. + */ + writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); + + /* CNTR 3 - Not used */ + if (test_dm) { + mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0_wl_1; + writel(rwcfg->lfsr_wr_rd_dm_bank_0_data, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } else { + mcc_instruction = rwcfg->lfsr_wr_rd_bank_0_wl_1; + writel(rwcfg->lfsr_wr_rd_bank_0_data, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + writel(rwcfg->lfsr_wr_rd_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } + } else if (rw_wl_nop_cycles == 0) { + /* + * CNTR 2 - We want to skip the NOP operation and go straight + * to the DQS enable instruction. We set the counter to a large + * number so that the jump is always taken. + */ + writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); + + /* CNTR 3 - Not used */ + if (test_dm) { + mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0; + writel(rwcfg->lfsr_wr_rd_dm_bank_0_dqs, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + } else { + mcc_instruction = rwcfg->lfsr_wr_rd_bank_0; + writel(rwcfg->lfsr_wr_rd_bank_0_dqs, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + } + } else { + /* + * CNTR 2 - In this case we want to execute the next instruction + * and NOT take the jump. So we set the counter to 0. The jump + * address doesn't count. + */ + writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); + writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); + + /* + * CNTR 3 - Set the nop counter to the number of cycles we + * need to loop for, minus 1. + */ + writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); + if (test_dm) { + mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0; + writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } else { + mcc_instruction = rwcfg->lfsr_wr_rd_bank_0; + writel(rwcfg->lfsr_wr_rd_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } + } + + writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RESET_READ_DATAPATH_OFFSET); + + if (quick_write_mode) + writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); + else + writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); + + writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); + + /* + * CNTR 1 - This is used to ensure enough time elapses + * for read data to come back. + */ + writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); + + if (test_dm) { + writel(rwcfg->lfsr_wr_rd_dm_bank_0_wait, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + } else { + writel(rwcfg->lfsr_wr_rd_bank_0_wait, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + } + + writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET) + + (group << 2)); +} + +/** + * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple pass + * @rank_bgn: Rank number + * @write_group: Write Group + * @use_dm: Use DM + * @all_correct: All bits must be correct in the mask + * @bit_chk: Resulting bit mask after the test + * @all_ranks: Test all ranks + * + * Test writes, can check for a single bit pass or multiple bit pass. + */ +static int +rw_mgr_mem_calibrate_write_test(const u32 rank_bgn, const u32 write_group, + const u32 use_dm, const u32 all_correct, + u32 *bit_chk, const u32 all_ranks) +{ + const u32 rank_end = all_ranks ? + rwcfg->mem_number_of_ranks : + (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + const u32 shift_ratio = rwcfg->mem_dq_per_write_dqs / + rwcfg->mem_virtual_groups_per_write_dqs; + const u32 correct_mask_vg = param->write_correct_mask_vg; + + u32 tmp_bit_chk, base_rw_mgr; + int vg, r; + + *bit_chk = param->write_correct_mask; + + for (r = rank_bgn; r < rank_end; r++) { + /* Set rank */ + set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); + + tmp_bit_chk = 0; + for (vg = rwcfg->mem_virtual_groups_per_write_dqs - 1; + vg >= 0; vg--) { + /* Reset the FIFOs to get pointers to known state. */ + writel(0, &phy_mgr_cmd->fifo_reset); + + rw_mgr_mem_calibrate_write_test_issue( + write_group * + rwcfg->mem_virtual_groups_per_write_dqs + vg, + use_dm); + + base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); + tmp_bit_chk <<= shift_ratio; + tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr)); + } + + *bit_chk &= tmp_bit_chk; + } + + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + if (all_correct) { + debug_cond(DLEVEL == 2, + "write_test(%u,%u,ALL) : %u == %u => %i\n", + write_group, use_dm, *bit_chk, + param->write_correct_mask, + *bit_chk == param->write_correct_mask); + return *bit_chk == param->write_correct_mask; + } else { + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + debug_cond(DLEVEL == 2, + "write_test(%u,%u,ONE) : %u != %i => %i\n", + write_group, use_dm, *bit_chk, 0, *bit_chk != 0); + return *bit_chk != 0x00; + } +} + +/** + * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns + * @rank_bgn: Rank number + * @group: Read/Write Group + * @all_ranks: Test all ranks + * + * Performs a guaranteed read on the patterns we are going to use during a + * read test to ensure memory works. + */ +static int +rw_mgr_mem_calibrate_read_test_patterns(const u32 rank_bgn, const u32 group, + const u32 all_ranks) +{ + const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET; + const u32 addr_offset = + (group * rwcfg->mem_virtual_groups_per_read_dqs) << 2; + const u32 rank_end = all_ranks ? + rwcfg->mem_number_of_ranks : + (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + const u32 shift_ratio = rwcfg->mem_dq_per_read_dqs / + rwcfg->mem_virtual_groups_per_read_dqs; + const u32 correct_mask_vg = param->read_correct_mask_vg; + + u32 tmp_bit_chk, base_rw_mgr, bit_chk; + int vg, r; + int ret = 0; + + bit_chk = param->read_correct_mask; + + for (r = rank_bgn; r < rank_end; r++) { + /* Set rank */ + set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); + + /* Load up a constant bursts of read commands */ + writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); + writel(rwcfg->guaranteed_read, + &sdr_rw_load_jump_mgr_regs->load_jump_add0); + + writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); + writel(rwcfg->guaranteed_read_cont, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + + tmp_bit_chk = 0; + for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; + vg >= 0; vg--) { + /* Reset the FIFOs to get pointers to known state. */ + writel(0, &phy_mgr_cmd->fifo_reset); + writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RESET_READ_DATAPATH_OFFSET); + writel(rwcfg->guaranteed_read, + addr + addr_offset + (vg << 2)); + + base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); + tmp_bit_chk <<= shift_ratio; + tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr; + } + + bit_chk &= tmp_bit_chk; + } + + writel(rwcfg->clear_dqs_enable, addr + (group << 2)); + + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + + if (bit_chk != param->read_correct_mask) + ret = -EIO; + + debug_cond(DLEVEL == 1, + "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n", + __func__, __LINE__, group, bit_chk, + param->read_correct_mask, ret); + + return ret; +} + +/** + * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read test + * @rank_bgn: Rank number + * @all_ranks: Test all ranks + * + * Load up the patterns we are going to use during a read test. + */ +static void rw_mgr_mem_calibrate_read_load_patterns(const u32 rank_bgn, + const int all_ranks) +{ + const u32 rank_end = all_ranks ? + rwcfg->mem_number_of_ranks : + (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + u32 r; + + debug("%s:%d\n", __func__, __LINE__); + + for (r = rank_bgn; r < rank_end; r++) { + /* set rank */ + set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); + + /* Load up a constant bursts */ + writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); + + writel(rwcfg->guaranteed_write_wait0, + &sdr_rw_load_jump_mgr_regs->load_jump_add0); + + writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); + + writel(rwcfg->guaranteed_write_wait1, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + + writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2); + + writel(rwcfg->guaranteed_write_wait2, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + + writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3); + + writel(rwcfg->guaranteed_write_wait3, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + + writel(rwcfg->guaranteed_write, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET); + } + + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); +} + +/** + * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank + * @rank_bgn: Rank number + * @group: Read/Write group + * @num_tries: Number of retries of the test + * @all_correct: All bits must be correct in the mask + * @bit_chk: Resulting bit mask after the test + * @all_groups: Test all R/W groups + * @all_ranks: Test all ranks + * + * Try a read and see if it returns correct data back. Test has dummy reads + * inserted into the mix used to align DQS enable. Test has more thorough + * checks than the regular read test. + */ +static int +rw_mgr_mem_calibrate_read_test(const u32 rank_bgn, const u32 group, + const u32 num_tries, const u32 all_correct, + u32 *bit_chk, + const u32 all_groups, const u32 all_ranks) +{ + const u32 rank_end = all_ranks ? rwcfg->mem_number_of_ranks : + (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + const u32 quick_read_mode = + ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) && + misccfg->enable_super_quick_calibration); + u32 correct_mask_vg = param->read_correct_mask_vg; + u32 tmp_bit_chk; + u32 base_rw_mgr; + u32 addr; + + int r, vg, ret; + + *bit_chk = param->read_correct_mask; + + for (r = rank_bgn; r < rank_end; r++) { + /* set rank */ + set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); + + writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1); + + writel(rwcfg->read_b2b_wait1, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + + writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2); + writel(rwcfg->read_b2b_wait2, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + + if (quick_read_mode) + writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0); + /* need at least two (1+1) reads to capture failures */ + else if (all_groups) + writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0); + else + writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0); + + writel(rwcfg->read_b2b, + &sdr_rw_load_jump_mgr_regs->load_jump_add0); + if (all_groups) + writel(rwcfg->mem_if_read_dqs_width * + rwcfg->mem_virtual_groups_per_read_dqs - 1, + &sdr_rw_load_mgr_regs->load_cntr3); + else + writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3); + + writel(rwcfg->read_b2b, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + + tmp_bit_chk = 0; + for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; vg >= 0; + vg--) { + /* Reset the FIFOs to get pointers to known state. */ + writel(0, &phy_mgr_cmd->fifo_reset); + writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RESET_READ_DATAPATH_OFFSET); + + if (all_groups) { + addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_ALL_GROUPS_OFFSET; + } else { + addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET; + } + + writel(rwcfg->read_b2b, addr + + ((group * + rwcfg->mem_virtual_groups_per_read_dqs + + vg) << 2)); + + base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); + tmp_bit_chk <<= rwcfg->mem_dq_per_read_dqs / + rwcfg->mem_virtual_groups_per_read_dqs; + tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr); + } + + *bit_chk &= tmp_bit_chk; + } + + addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; + writel(rwcfg->clear_dqs_enable, addr + (group << 2)); + + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + + if (all_correct) { + ret = (*bit_chk == param->read_correct_mask); + debug_cond(DLEVEL == 2, + "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n", + __func__, __LINE__, group, all_groups, *bit_chk, + param->read_correct_mask, ret); + } else { + ret = (*bit_chk != 0x00); + debug_cond(DLEVEL == 2, + "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n", + __func__, __LINE__, group, all_groups, *bit_chk, + 0, ret); + } + + return ret; +} + +/** + * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks + * @grp: Read/Write group + * @num_tries: Number of retries of the test + * @all_correct: All bits must be correct in the mask + * @all_groups: Test all R/W groups + * + * Perform a READ test across all memory ranks. + */ +static int +rw_mgr_mem_calibrate_read_test_all_ranks(const u32 grp, const u32 num_tries, + const u32 all_correct, + const u32 all_groups) +{ + u32 bit_chk; + return rw_mgr_mem_calibrate_read_test(0, grp, num_tries, all_correct, + &bit_chk, all_groups, 1); +} + +/** + * rw_mgr_incr_vfifo() - Increase VFIFO value + * @grp: Read/Write group + * + * Increase VFIFO value. + */ +static void rw_mgr_incr_vfifo(const u32 grp) +{ + writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy); +} + +/** + * rw_mgr_decr_vfifo() - Decrease VFIFO value + * @grp: Read/Write group + * + * Decrease VFIFO value. + */ +static void rw_mgr_decr_vfifo(const u32 grp) +{ + u32 i; + + for (i = 0; i < misccfg->read_valid_fifo_size - 1; i++) + rw_mgr_incr_vfifo(grp); +} + +/** + * find_vfifo_failing_read() - Push VFIFO to get a failing read + * @grp: Read/Write group + * + * Push VFIFO until a failing read happens. + */ +static int find_vfifo_failing_read(const u32 grp) +{ + u32 v, ret, fail_cnt = 0; + + for (v = 0; v < misccfg->read_valid_fifo_size; v++) { + debug_cond(DLEVEL == 2, "%s:%d: vfifo %u\n", + __func__, __LINE__, v); + ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, + PASS_ONE_BIT, 0); + if (!ret) { + fail_cnt++; + + if (fail_cnt == 2) + return v; + } + + /* Fiddle with FIFO. */ + rw_mgr_incr_vfifo(grp); + } + + /* No failing read found! Something must have gone wrong. */ + debug_cond(DLEVEL == 2, "%s:%d: vfifo failed\n", __func__, __LINE__); + return 0; +} + +/** + * sdr_find_phase_delay() - Find DQS enable phase or delay + * @working: If 1, look for working phase/delay, if 0, look for non-working + * @delay: If 1, look for delay, if 0, look for phase + * @grp: Read/Write group + * @work: Working window position + * @work_inc: Working window increment + * @pd: DQS Phase/Delay Iterator + * + * Find working or non-working DQS enable phase setting. + */ +static int sdr_find_phase_delay(int working, int delay, const u32 grp, + u32 *work, const u32 work_inc, u32 *pd) +{ + const u32 max = delay ? iocfg->dqs_en_delay_max : + iocfg->dqs_en_phase_max; + u32 ret; + + for (; *pd <= max; (*pd)++) { + if (delay) + scc_mgr_set_dqs_en_delay_all_ranks(grp, *pd); + else + scc_mgr_set_dqs_en_phase_all_ranks(grp, *pd); + + ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, + PASS_ONE_BIT, 0); + if (!working) + ret = !ret; + + if (ret) + return 0; + + if (work) + *work += work_inc; + } + + return -EINVAL; +} +/** + * sdr_find_phase() - Find DQS enable phase + * @working: If 1, look for working phase, if 0, look for non-working phase + * @grp: Read/Write group + * @work: Working window position + * @i: Iterator + * @p: DQS Phase Iterator + * + * Find working or non-working DQS enable phase setting. + */ +static int sdr_find_phase(int working, const u32 grp, u32 *work, + u32 *i, u32 *p) +{ + const u32 end = misccfg->read_valid_fifo_size + (working ? 0 : 1); + int ret; + + for (; *i < end; (*i)++) { + if (working) + *p = 0; + + ret = sdr_find_phase_delay(working, 0, grp, work, + iocfg->delay_per_opa_tap, p); + if (!ret) + return 0; + + if (*p > iocfg->dqs_en_phase_max) { + /* Fiddle with FIFO. */ + rw_mgr_incr_vfifo(grp); + if (!working) + *p = 0; + } + } + + return -EINVAL; +} + +/** + * sdr_working_phase() - Find working DQS enable phase + * @grp: Read/Write group + * @work_bgn: Working window start position + * @d: dtaps output value + * @p: DQS Phase Iterator + * @i: Iterator + * + * Find working DQS enable phase setting. + */ +static int sdr_working_phase(const u32 grp, u32 *work_bgn, u32 *d, + u32 *p, u32 *i) +{ + const u32 dtaps_per_ptap = iocfg->delay_per_opa_tap / + iocfg->delay_per_dqs_en_dchain_tap; + int ret; + + *work_bgn = 0; + + for (*d = 0; *d <= dtaps_per_ptap; (*d)++) { + *i = 0; + scc_mgr_set_dqs_en_delay_all_ranks(grp, *d); + ret = sdr_find_phase(1, grp, work_bgn, i, p); + if (!ret) + return 0; + *work_bgn += iocfg->delay_per_dqs_en_dchain_tap; + } + + /* Cannot find working solution */ + debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n", + __func__, __LINE__); + return -EINVAL; +} + +/** + * sdr_backup_phase() - Find DQS enable backup phase + * @grp: Read/Write group + * @work_bgn: Working window start position + * @p: DQS Phase Iterator + * + * Find DQS enable backup phase setting. + */ +static void sdr_backup_phase(const u32 grp, u32 *work_bgn, u32 *p) +{ + u32 tmp_delay, d; + int ret; + + /* Special case code for backing up a phase */ + if (*p == 0) { + *p = iocfg->dqs_en_phase_max; + rw_mgr_decr_vfifo(grp); + } else { + (*p)--; + } + tmp_delay = *work_bgn - iocfg->delay_per_opa_tap; + scc_mgr_set_dqs_en_phase_all_ranks(grp, *p); + + for (d = 0; d <= iocfg->dqs_en_delay_max && tmp_delay < *work_bgn; + d++) { + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, + PASS_ONE_BIT, 0); + if (ret) { + *work_bgn = tmp_delay; + break; + } + + tmp_delay += iocfg->delay_per_dqs_en_dchain_tap; + } + + /* Restore VFIFO to old state before we decremented it (if needed). */ + (*p)++; + if (*p > iocfg->dqs_en_phase_max) { + *p = 0; + rw_mgr_incr_vfifo(grp); + } + + scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); +} + +/** + * sdr_nonworking_phase() - Find non-working DQS enable phase + * @grp: Read/Write group + * @work_end: Working window end position + * @p: DQS Phase Iterator + * @i: Iterator + * + * Find non-working DQS enable phase setting. + */ +static int sdr_nonworking_phase(const u32 grp, u32 *work_end, u32 *p, u32 *i) +{ + int ret; + + (*p)++; + *work_end += iocfg->delay_per_opa_tap; + if (*p > iocfg->dqs_en_phase_max) { + /* Fiddle with FIFO. */ + *p = 0; + rw_mgr_incr_vfifo(grp); + } + + ret = sdr_find_phase(0, grp, work_end, i, p); + if (ret) { + /* Cannot see edge of failing read. */ + debug_cond(DLEVEL == 2, "%s:%d: end: failed\n", + __func__, __LINE__); + } + + return ret; +} + +/** + * sdr_find_window_center() - Find center of the working DQS window. + * @grp: Read/Write group + * @work_bgn: First working settings + * @work_end: Last working settings + * + * Find center of the working DQS enable window. + */ +static int sdr_find_window_center(const u32 grp, const u32 work_bgn, + const u32 work_end) +{ + u32 work_mid; + int tmp_delay = 0; + int i, p, d; + + work_mid = (work_bgn + work_end) / 2; + + debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n", + work_bgn, work_end, work_mid); + /* Get the middle delay to be less than a VFIFO delay */ + tmp_delay = (iocfg->dqs_en_phase_max + 1) * iocfg->delay_per_opa_tap; + + debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay); + work_mid %= tmp_delay; + debug_cond(DLEVEL == 2, "new work_mid %d\n", work_mid); + + tmp_delay = rounddown(work_mid, iocfg->delay_per_opa_tap); + if (tmp_delay > iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap) + tmp_delay = iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap; + p = tmp_delay / iocfg->delay_per_opa_tap; + + debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", p, tmp_delay); + + d = DIV_ROUND_UP(work_mid - tmp_delay, + iocfg->delay_per_dqs_en_dchain_tap); + if (d > iocfg->dqs_en_delay_max) + d = iocfg->dqs_en_delay_max; + tmp_delay += d * iocfg->delay_per_dqs_en_dchain_tap; + + debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", d, tmp_delay); + + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + /* + * push vfifo until we can successfully calibrate. We can do this + * because the largest possible margin in 1 VFIFO cycle. + */ + for (i = 0; i < misccfg->read_valid_fifo_size; i++) { + debug_cond(DLEVEL == 2, "find_dqs_en_phase: center\n"); + if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, + PASS_ONE_BIT, + 0)) { + debug_cond(DLEVEL == 2, + "%s:%d center: found: ptap=%u dtap=%u\n", + __func__, __LINE__, p, d); + return 0; + } + + /* Fiddle with FIFO. */ + rw_mgr_incr_vfifo(grp); + } + + debug_cond(DLEVEL == 2, "%s:%d center: failed.\n", + __func__, __LINE__); + return -EINVAL; +} + +/** + * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to use + * @grp: Read/Write Group + * + * Find a good DQS enable to use. + */ +static int rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(const u32 grp) +{ + u32 d, p, i; + u32 dtaps_per_ptap; + u32 work_bgn, work_end; + u32 found_passing_read, found_failing_read, initial_failing_dtap; + int ret; + + debug("%s:%d %u\n", __func__, __LINE__, grp); + + reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); + + scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); + scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); + + /* Step 0: Determine number of delay taps for each phase tap. */ + dtaps_per_ptap = iocfg->delay_per_opa_tap / + iocfg->delay_per_dqs_en_dchain_tap; + + /* Step 1: First push vfifo until we get a failing read. */ + find_vfifo_failing_read(grp); + + /* Step 2: Find first working phase, increment in ptaps. */ + work_bgn = 0; + ret = sdr_working_phase(grp, &work_bgn, &d, &p, &i); + if (ret) + return ret; + + work_end = work_bgn; + + /* + * If d is 0 then the working window covers a phase tap and we can + * follow the old procedure. Otherwise, we've found the beginning + * and we need to increment the dtaps until we find the end. + */ + if (d == 0) { + /* + * Step 3a: If we have room, back off by one and + * increment in dtaps. + */ + sdr_backup_phase(grp, &work_bgn, &p); + + /* + * Step 4a: go forward from working phase to non working + * phase, increment in ptaps. + */ + ret = sdr_nonworking_phase(grp, &work_end, &p, &i); + if (ret) + return ret; + + /* Step 5a: Back off one from last, increment in dtaps. */ + + /* Special case code for backing up a phase */ + if (p == 0) { + p = iocfg->dqs_en_phase_max; + rw_mgr_decr_vfifo(grp); + } else { + p = p - 1; + } + + work_end -= iocfg->delay_per_opa_tap; + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + + d = 0; + + debug_cond(DLEVEL == 2, "%s:%d p: ptap=%u\n", + __func__, __LINE__, p); + } + + /* The dtap increment to find the failing edge is done here. */ + sdr_find_phase_delay(0, 1, grp, &work_end, + iocfg->delay_per_dqs_en_dchain_tap, &d); + + /* Go back to working dtap */ + if (d != 0) + work_end -= iocfg->delay_per_dqs_en_dchain_tap; + + debug_cond(DLEVEL == 2, + "%s:%d p/d: ptap=%u dtap=%u end=%u\n", + __func__, __LINE__, p, d - 1, work_end); + + if (work_end < work_bgn) { + /* nil range */ + debug_cond(DLEVEL == 2, "%s:%d end-2: failed\n", + __func__, __LINE__); + return -EINVAL; + } + + debug_cond(DLEVEL == 2, "%s:%d found range [%u,%u]\n", + __func__, __LINE__, work_bgn, work_end); + + /* + * We need to calculate the number of dtaps that equal a ptap. + * To do that we'll back up a ptap and re-find the edge of the + * window using dtaps + */ + debug_cond(DLEVEL == 2, "%s:%d calculate dtaps_per_ptap for tracking\n", + __func__, __LINE__); + + /* Special case code for backing up a phase */ + if (p == 0) { + p = iocfg->dqs_en_phase_max; + rw_mgr_decr_vfifo(grp); + debug_cond(DLEVEL == 2, "%s:%d backedup cycle/phase: p=%u\n", + __func__, __LINE__, p); + } else { + p = p - 1; + debug_cond(DLEVEL == 2, "%s:%d backedup phase only: p=%u", + __func__, __LINE__, p); + } + + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + + /* + * Increase dtap until we first see a passing read (in case the + * window is smaller than a ptap), and then a failing read to + * mark the edge of the window again. + */ + + /* Find a passing read. */ + debug_cond(DLEVEL == 2, "%s:%d find passing read\n", + __func__, __LINE__); + + initial_failing_dtap = d; + + found_passing_read = !sdr_find_phase_delay(1, 1, grp, NULL, 0, &d); + if (found_passing_read) { + /* Find a failing read. */ + debug_cond(DLEVEL == 2, "%s:%d find failing read\n", + __func__, __LINE__); + d++; + found_failing_read = !sdr_find_phase_delay(0, 1, grp, NULL, 0, + &d); + } else { + debug_cond(DLEVEL == 1, + "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n", + __func__, __LINE__); + } + + /* + * The dynamically calculated dtaps_per_ptap is only valid if we + * found a passing/failing read. If we didn't, it means d hit the max + * (iocfg->dqs_en_delay_max). Otherwise, dtaps_per_ptap retains its + * statically calculated value. + */ + if (found_passing_read && found_failing_read) + dtaps_per_ptap = d - initial_failing_dtap; + + writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); + debug_cond(DLEVEL == 2, "%s:%d dtaps_per_ptap=%u - %u = %u", + __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap); + + /* Step 6: Find the centre of the window. */ + ret = sdr_find_window_center(grp, work_bgn, work_end); + + return ret; +} + +/** + * search_stop_check() - Check if the detected edge is valid + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @d: DQS delay + * @rank_bgn: Rank number + * @write_group: Write Group + * @read_group: Read Group + * @bit_chk: Resulting bit mask after the test + * @sticky_bit_chk: Resulting sticky bit mask after the test + * @use_read_test: Perform read test + * + * Test if the found edge is valid. + */ +static u32 search_stop_check(const int write, const int d, const int rank_bgn, + const u32 write_group, const u32 read_group, + u32 *bit_chk, u32 *sticky_bit_chk, + const u32 use_read_test) +{ + const u32 ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; + const u32 correct_mask = write ? param->write_correct_mask : + param->read_correct_mask; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + u32 ret; + /* + * Stop searching when the read test doesn't pass AND when + * we've seen a passing read on every bit. + */ + if (write) { /* WRITE-ONLY */ + ret = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, + 0, PASS_ONE_BIT, + bit_chk, 0); + } else if (use_read_test) { /* READ-ONLY */ + ret = !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group, + NUM_READ_PB_TESTS, + PASS_ONE_BIT, bit_chk, + 0, 0); + } else { /* READ-ONLY */ + rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0, + PASS_ONE_BIT, bit_chk, 0); + *bit_chk = *bit_chk >> (per_dqs * + (read_group - (write_group * ratio))); + ret = (*bit_chk == 0); + } + *sticky_bit_chk = *sticky_bit_chk | *bit_chk; + ret = ret && (*sticky_bit_chk == correct_mask); + debug_cond(DLEVEL == 2, + "%s:%d center(left): dtap=%u => %u == %u && %u", + __func__, __LINE__, d, + *sticky_bit_chk, correct_mask, ret); + return ret; +} + +/** + * search_left_edge() - Find left edge of DQ/DQS working phase + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @rank_bgn: Rank number + * @write_group: Write Group + * @read_group: Read Group + * @test_bgn: Rank number to begin the test + * @sticky_bit_chk: Resulting sticky bit mask after the test + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @use_read_test: Perform read test + * + * Find left edge of DQ/DQS working phase. + */ +static void search_left_edge(const int write, const int rank_bgn, + const u32 write_group, const u32 read_group, const u32 test_bgn, + u32 *sticky_bit_chk, + int *left_edge, int *right_edge, const u32 use_read_test) +{ + const u32 delay_max = write ? iocfg->io_out1_delay_max : + iocfg->io_in_delay_max; + const u32 dqs_max = write ? iocfg->io_out1_delay_max : + iocfg->dqs_in_delay_max; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + u32 stop, bit_chk; + int i, d; + + for (d = 0; d <= dqs_max; d++) { + if (write) + scc_mgr_apply_group_dq_out1_delay(d); + else + scc_mgr_apply_group_dq_in_delay(test_bgn, d); + + writel(0, &sdr_scc_mgr->update); + + stop = search_stop_check(write, d, rank_bgn, write_group, + read_group, &bit_chk, sticky_bit_chk, + use_read_test); + if (stop == 1) + break; + + /* stop != 1 */ + for (i = 0; i < per_dqs; i++) { + if (bit_chk & 1) { + /* + * Remember a passing test as + * the left_edge. + */ + left_edge[i] = d; + } else { + /* + * If a left edge has not been seen + * yet, then a future passing test + * will mark this edge as the right + * edge. + */ + if (left_edge[i] == delay_max + 1) + right_edge[i] = -(d + 1); + } + bit_chk >>= 1; + } + } + + /* Reset DQ delay chains to 0 */ + if (write) + scc_mgr_apply_group_dq_out1_delay(0); + else + scc_mgr_apply_group_dq_in_delay(test_bgn, 0); + + *sticky_bit_chk = 0; + for (i = per_dqs - 1; i >= 0; i--) { + debug_cond(DLEVEL == 2, + "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n", + __func__, __LINE__, i, left_edge[i], + i, right_edge[i]); + + /* + * Check for cases where we haven't found the left edge, + * which makes our assignment of the the right edge invalid. + * Reset it to the illegal value. + */ + if ((left_edge[i] == delay_max + 1) && + (right_edge[i] != delay_max + 1)) { + right_edge[i] = delay_max + 1; + debug_cond(DLEVEL == 2, + "%s:%d vfifo_center: reset right_edge[%u]: %d\n", + __func__, __LINE__, i, right_edge[i]); + } + + /* + * Reset sticky bit + * READ: except for bits where we have seen both + * the left and right edge. + * WRITE: except for bits where we have seen the + * left edge. + */ + *sticky_bit_chk <<= 1; + if (write) { + if (left_edge[i] != delay_max + 1) + *sticky_bit_chk |= 1; + } else { + if ((left_edge[i] != delay_max + 1) && + (right_edge[i] != delay_max + 1)) + *sticky_bit_chk |= 1; + } + } +} + +/** + * search_right_edge() - Find right edge of DQ/DQS working phase + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @rank_bgn: Rank number + * @write_group: Write Group + * @read_group: Read Group + * @start_dqs: DQS start phase + * @start_dqs_en: DQS enable start phase + * @sticky_bit_chk: Resulting sticky bit mask after the test + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @use_read_test: Perform read test + * + * Find right edge of DQ/DQS working phase. + */ +static int search_right_edge(const int write, const int rank_bgn, + const u32 write_group, const u32 read_group, + const int start_dqs, const int start_dqs_en, + u32 *sticky_bit_chk, + int *left_edge, int *right_edge, const u32 use_read_test) +{ + const u32 delay_max = write ? iocfg->io_out1_delay_max : + iocfg->io_in_delay_max; + const u32 dqs_max = write ? iocfg->io_out1_delay_max : + iocfg->dqs_in_delay_max; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + u32 stop, bit_chk; + int i, d; + + for (d = 0; d <= dqs_max - start_dqs; d++) { + if (write) { /* WRITE-ONLY */ + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, + d + start_dqs); + } else { /* READ-ONLY */ + scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); + if (iocfg->shift_dqs_en_when_shift_dqs) { + u32 delay = d + start_dqs_en; + if (delay > iocfg->dqs_en_delay_max) + delay = iocfg->dqs_en_delay_max; + scc_mgr_set_dqs_en_delay(read_group, delay); + } + scc_mgr_load_dqs(read_group); + } + + writel(0, &sdr_scc_mgr->update); + + stop = search_stop_check(write, d, rank_bgn, write_group, + read_group, &bit_chk, sticky_bit_chk, + use_read_test); + if (stop == 1) { + if (write && (d == 0)) { /* WRITE-ONLY */ + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; + i++) { + /* + * d = 0 failed, but it passed when + * testing the left edge, so it must be + * marginal, set it to -1 + */ + if (right_edge[i] == delay_max + 1 && + left_edge[i] != delay_max + 1) + right_edge[i] = -1; + } + } + break; + } + + /* stop != 1 */ + for (i = 0; i < per_dqs; i++) { + if (bit_chk & 1) { + /* + * Remember a passing test as + * the right_edge. + */ + right_edge[i] = d; + } else { + if (d != 0) { + /* + * If a right edge has not + * been seen yet, then a future + * passing test will mark this + * edge as the left edge. + */ + if (right_edge[i] == delay_max + 1) + left_edge[i] = -(d + 1); + } else { + /* + * d = 0 failed, but it passed + * when testing the left edge, + * so it must be marginal, set + * it to -1 + */ + if (right_edge[i] == delay_max + 1 && + left_edge[i] != delay_max + 1) + right_edge[i] = -1; + /* + * If a right edge has not been + * seen yet, then a future + * passing test will mark this + * edge as the left edge. + */ + else if (right_edge[i] == delay_max + 1) + left_edge[i] = -(d + 1); + } + } + + debug_cond(DLEVEL == 2, "%s:%d center[r,d=%u]: ", + __func__, __LINE__, d); + debug_cond(DLEVEL == 2, + "bit_chk_test=%i left_edge[%u]: %d ", + bit_chk & 1, i, left_edge[i]); + debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, + right_edge[i]); + bit_chk >>= 1; + } + } + + /* Check that all bits have a window */ + for (i = 0; i < per_dqs; i++) { + debug_cond(DLEVEL == 2, + "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d", + __func__, __LINE__, i, left_edge[i], + i, right_edge[i]); + if ((left_edge[i] == dqs_max + 1) || + (right_edge[i] == dqs_max + 1)) + return i + 1; /* FIXME: If we fail, retval > 0 */ + } + + return 0; +} + +/** + * get_window_mid_index() - Find the best middle setting of DQ/DQS phase + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @mid_min: Best DQ/DQS phase middle setting + * + * Find index and value of the middle of the DQ/DQS working phase. + */ +static int get_window_mid_index(const int write, int *left_edge, + int *right_edge, int *mid_min) +{ + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + int i, mid, min_index; + + /* Find middle of window for each DQ bit */ + *mid_min = left_edge[0] - right_edge[0]; + min_index = 0; + for (i = 1; i < per_dqs; i++) { + mid = left_edge[i] - right_edge[i]; + if (mid < *mid_min) { + *mid_min = mid; + min_index = i; + } + } + + /* + * -mid_min/2 represents the amount that we need to move DQS. + * If mid_min is odd and positive we'll need to add one to make + * sure the rounding in further calculations is correct (always + * bias to the right), so just add 1 for all positive values. + */ + if (*mid_min > 0) + (*mid_min)++; + *mid_min = *mid_min / 2; + + debug_cond(DLEVEL == 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n", + __func__, __LINE__, *mid_min, min_index); + return min_index; +} + +/** + * center_dq_windows() - Center the DQ/DQS windows + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @mid_min: Adjusted DQ/DQS phase middle setting + * @orig_mid_min: Original DQ/DQS phase middle setting + * @min_index: DQ/DQS phase middle setting index + * @test_bgn: Rank number to begin the test + * @dq_margin: Amount of shift for the DQ + * @dqs_margin: Amount of shift for the DQS + * + * Align the DQ/DQS windows in each group. + */ +static void center_dq_windows(const int write, int *left_edge, int *right_edge, + const int mid_min, const int orig_mid_min, + const int min_index, const int test_bgn, + int *dq_margin, int *dqs_margin) +{ + const u32 delay_max = write ? iocfg->io_out1_delay_max : + iocfg->io_in_delay_max; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + const u32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET : + SCC_MGR_IO_IN_DELAY_OFFSET; + const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off; + + u32 temp_dq_io_delay1, temp_dq_io_delay2; + int shift_dq, i, p; + + /* Initialize data for export structures */ + *dqs_margin = delay_max + 1; + *dq_margin = delay_max + 1; + + /* add delay to bring centre of all DQ windows to the same "level" */ + for (i = 0, p = test_bgn; i < per_dqs; i++, p++) { + /* Use values before divide by 2 to reduce round off error */ + shift_dq = (left_edge[i] - right_edge[i] - + (left_edge[min_index] - right_edge[min_index]))/2 + + (orig_mid_min - mid_min); + + debug_cond(DLEVEL == 2, + "vfifo_center: before: shift_dq[%u]=%d\n", + i, shift_dq); + + temp_dq_io_delay1 = readl(addr + (p << 2)); + temp_dq_io_delay2 = readl(addr + (i << 2)); + + if (shift_dq + temp_dq_io_delay1 > delay_max) + shift_dq = delay_max - temp_dq_io_delay2; + else if (shift_dq + temp_dq_io_delay1 < 0) + shift_dq = -temp_dq_io_delay1; + + debug_cond(DLEVEL == 2, + "vfifo_center: after: shift_dq[%u]=%d\n", + i, shift_dq); + + if (write) + scc_mgr_set_dq_out1_delay(i, + temp_dq_io_delay1 + shift_dq); + else + scc_mgr_set_dq_in_delay(p, + temp_dq_io_delay1 + shift_dq); + + scc_mgr_load_dq(p); + + debug_cond(DLEVEL == 2, + "vfifo_center: margin[%u]=[%d,%d]\n", i, + left_edge[i] - shift_dq + (-mid_min), + right_edge[i] + shift_dq - (-mid_min)); + + /* To determine values for export structures */ + if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin) + *dq_margin = left_edge[i] - shift_dq + (-mid_min); + + if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin) + *dqs_margin = right_edge[i] + shift_dq - (-mid_min); + } +} + +/** + * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering + * @rank_bgn: Rank number + * @rw_group: Read/Write Group + * @test_bgn: Rank at which the test begins + * @use_read_test: Perform a read test + * @update_fom: Update FOM + * + * Per-bit deskew DQ and centering. + */ +static int rw_mgr_mem_calibrate_vfifo_center(const u32 rank_bgn, + const u32 rw_group, const u32 test_bgn, + const int use_read_test, const int update_fom) +{ + const u32 addr = + SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET + + (rw_group << 2); + /* + * Store these as signed since there are comparisons with + * signed numbers. + */ + u32 sticky_bit_chk; + int32_t left_edge[rwcfg->mem_dq_per_read_dqs]; + int32_t right_edge[rwcfg->mem_dq_per_read_dqs]; + int32_t orig_mid_min, mid_min; + int32_t new_dqs, start_dqs, start_dqs_en = 0, final_dqs_en; + int32_t dq_margin, dqs_margin; + int i, min_index; + int ret; + + debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn); + + start_dqs = readl(addr); + if (iocfg->shift_dqs_en_when_shift_dqs) + start_dqs_en = readl(addr - iocfg->dqs_en_delay_offset); + + /* set the left and right edge of each bit to an illegal value */ + /* use (iocfg->io_in_delay_max + 1) as an illegal value */ + sticky_bit_chk = 0; + for (i = 0; i < rwcfg->mem_dq_per_read_dqs; i++) { + left_edge[i] = iocfg->io_in_delay_max + 1; + right_edge[i] = iocfg->io_in_delay_max + 1; + } + + /* Search for the left edge of the window for each bit */ + search_left_edge(0, rank_bgn, rw_group, rw_group, test_bgn, + &sticky_bit_chk, + left_edge, right_edge, use_read_test); + + + /* Search for the right edge of the window for each bit */ + ret = search_right_edge(0, rank_bgn, rw_group, rw_group, + start_dqs, start_dqs_en, + &sticky_bit_chk, + left_edge, right_edge, use_read_test); + if (ret) { + /* + * Restore delay chain settings before letting the loop + * in rw_mgr_mem_calibrate_vfifo to retry different + * dqs/ck relationships. + */ + scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs); + if (iocfg->shift_dqs_en_when_shift_dqs) + scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en); + + scc_mgr_load_dqs(rw_group); + writel(0, &sdr_scc_mgr->update); + + debug_cond(DLEVEL == 1, + "%s:%d vfifo_center: failed to find edge [%u]: %d %d", + __func__, __LINE__, i, left_edge[i], right_edge[i]); + if (use_read_test) { + set_failing_group_stage(rw_group * + rwcfg->mem_dq_per_read_dqs + i, + CAL_STAGE_VFIFO, + CAL_SUBSTAGE_VFIFO_CENTER); + } else { + set_failing_group_stage(rw_group * + rwcfg->mem_dq_per_read_dqs + i, + CAL_STAGE_VFIFO_AFTER_WRITES, + CAL_SUBSTAGE_VFIFO_CENTER); + } + return -EIO; + } + + min_index = get_window_mid_index(0, left_edge, right_edge, &mid_min); + + /* Determine the amount we can change DQS (which is -mid_min) */ + orig_mid_min = mid_min; + new_dqs = start_dqs - mid_min; + if (new_dqs > iocfg->dqs_in_delay_max) + new_dqs = iocfg->dqs_in_delay_max; + else if (new_dqs < 0) + new_dqs = 0; + + mid_min = start_dqs - new_dqs; + debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", + mid_min, new_dqs); + + if (iocfg->shift_dqs_en_when_shift_dqs) { + if (start_dqs_en - mid_min > iocfg->dqs_en_delay_max) + mid_min += start_dqs_en - mid_min - + iocfg->dqs_en_delay_max; + else if (start_dqs_en - mid_min < 0) + mid_min += start_dqs_en - mid_min; + } + new_dqs = start_dqs - mid_min; + + debug_cond(DLEVEL == 1, + "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n", + start_dqs, + iocfg->shift_dqs_en_when_shift_dqs ? start_dqs_en : -1, + new_dqs, mid_min); + + /* Add delay to bring centre of all DQ windows to the same "level". */ + center_dq_windows(0, left_edge, right_edge, mid_min, orig_mid_min, + min_index, test_bgn, &dq_margin, &dqs_margin); + + /* Move DQS-en */ + if (iocfg->shift_dqs_en_when_shift_dqs) { + final_dqs_en = start_dqs_en - mid_min; + scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en); + scc_mgr_load_dqs(rw_group); + } + + /* Move DQS */ + scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs); + scc_mgr_load_dqs(rw_group); + debug_cond(DLEVEL == 2, + "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d", + __func__, __LINE__, dq_margin, dqs_margin); + + /* + * Do not remove this line as it makes sure all of our decisions + * have been applied. Apply the update bit. + */ + writel(0, &sdr_scc_mgr->update); + + if ((dq_margin < 0) || (dqs_margin < 0)) + return -EINVAL; + + return 0; +} + +/** + * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the device + * @rw_group: Read/Write Group + * @phase: DQ/DQS phase + * + * Because initially no communication ca be reliably performed with the memory + * device, the sequencer uses a guaranteed write mechanism to write data into + * the memory device. + */ +static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group, + const u32 phase) +{ + int ret; + + /* Set a particular DQ/DQS phase. */ + scc_mgr_set_dqdqs_output_phase_all_ranks(rw_group, phase); + + debug_cond(DLEVEL == 1, "%s:%d guaranteed write: g=%u p=%u\n", + __func__, __LINE__, rw_group, phase); + + /* + * Altera EMI_RM 2015.05.04 :: Figure 1-25 + * Load up the patterns used by read calibration using the + * current DQDQS phase. + */ + rw_mgr_mem_calibrate_read_load_patterns(0, 1); + + if (gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ) + return 0; + + /* + * Altera EMI_RM 2015.05.04 :: Figure 1-26 + * Back-to-Back reads of the patterns used for calibration. + */ + ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1); + if (ret) + debug_cond(DLEVEL == 1, + "%s:%d Guaranteed read test failed: g=%u p=%u\n", + __func__, __LINE__, rw_group, phase); + return ret; +} + +/** + * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration + * @rw_group: Read/Write Group + * @test_bgn: Rank at which the test begins + * + * DQS enable calibration ensures reliable capture of the DQ signal without + * glitches on the DQS line. + */ +static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group, + const u32 test_bgn) +{ + /* + * Altera EMI_RM 2015.05.04 :: Figure 1-27 + * DQS and DQS Eanble Signal Relationships. + */ + + /* We start at zero, so have one less dq to devide among */ + const u32 delay_step = iocfg->io_in_delay_max / + (rwcfg->mem_dq_per_read_dqs - 1); + int ret; + u32 i, p, d, r; + + debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn); + + /* Try different dq_in_delays since the DQ path is shorter than DQS. */ + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + for (i = 0, p = test_bgn, d = 0; + i < rwcfg->mem_dq_per_read_dqs; + i++, p++, d += delay_step) { + debug_cond(DLEVEL == 1, + "%s:%d: g=%u r=%u i=%u p=%u d=%u\n", + __func__, __LINE__, rw_group, r, i, p, d); + + scc_mgr_set_dq_in_delay(p, d); + scc_mgr_load_dq(p); + } + + writel(0, &sdr_scc_mgr->update); + } + + /* + * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different + * dq_in_delay values + */ + ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(rw_group); + + debug_cond(DLEVEL == 1, + "%s:%d: g=%u found=%u; Reseting delay chain to zero\n", + __func__, __LINE__, rw_group, !ret); + + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + scc_mgr_apply_group_dq_in_delay(test_bgn, 0); + writel(0, &sdr_scc_mgr->update); + } + + return ret; +} + +/** + * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS + * @rw_group: Read/Write Group + * @test_bgn: Rank at which the test begins + * @use_read_test: Perform a read test + * @update_fom: Update FOM + * + * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads + * within a group. + */ +static int +rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn, + const int use_read_test, + const int update_fom) + +{ + int ret, grp_calibrated; + u32 rank_bgn, sr; + + /* + * Altera EMI_RM 2015.05.04 :: Figure 1-28 + * Read per-bit deskew can be done on a per shadow register basis. + */ + grp_calibrated = 1; + for (rank_bgn = 0, sr = 0; + rank_bgn < rwcfg->mem_number_of_ranks; + rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) { + ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group, + test_bgn, + use_read_test, + update_fom); + if (!ret) + continue; + + grp_calibrated = 0; + } + + if (!grp_calibrated) + return -EIO; + + return 0; +} + +/** + * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO + * @rw_group: Read/Write Group + * @test_bgn: Rank at which the test begins + * + * Stage 1: Calibrate the read valid prediction FIFO. + * + * This function implements UniPHY calibration Stage 1, as explained in + * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". + * + * - read valid prediction will consist of finding: + * - DQS enable phase and DQS enable delay (DQS Enable Calibration) + * - DQS input phase and DQS input delay (DQ/DQS Centering) + * - we also do a per-bit deskew on the DQ lines. + */ +static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn) +{ + u32 p, d; + u32 dtaps_per_ptap; + u32 failed_substage; + + int ret; + + debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn); + + /* Update info for sims */ + reg_file_set_group(rw_group); + reg_file_set_stage(CAL_STAGE_VFIFO); + reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); + + failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; + + /* USER Determine number of delay taps for each phase tap. */ + dtaps_per_ptap = DIV_ROUND_UP(iocfg->delay_per_opa_tap, + iocfg->delay_per_dqs_en_dchain_tap) - 1; + + for (d = 0; d <= dtaps_per_ptap; d += 2) { + /* + * In RLDRAMX we may be messing the delay of pins in + * the same write rw_group but outside of the current read + * the rw_group, but that's ok because we haven't calibrated + * output side yet. + */ + if (d > 0) { + scc_mgr_apply_group_all_out_delay_add_all_ranks( + rw_group, d); + } + + for (p = 0; p <= iocfg->dqdqs_out_phase_max; p++) { + /* 1) Guaranteed Write */ + ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p); + if (ret) + break; + + /* 2) DQS Enable Calibration */ + ret = rw_mgr_mem_calibrate_dqs_enable_calibration(rw_group, + test_bgn); + if (ret) { + failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; + continue; + } + + /* 3) Centering DQ/DQS */ + /* + * If doing read after write calibration, do not update + * FOM now. Do it then. + */ + ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, + test_bgn, 1, 0); + if (ret) { + failed_substage = CAL_SUBSTAGE_VFIFO_CENTER; + continue; + } + + /* All done. */ + goto cal_done_ok; + } + } + + /* Calibration Stage 1 failed. */ + set_failing_group_stage(rw_group, CAL_STAGE_VFIFO, failed_substage); + return 0; + + /* Calibration Stage 1 completed OK. */ +cal_done_ok: + /* + * Reset the delay chains back to zero if they have moved > 1 + * (check for > 1 because loop will increase d even when pass in + * first case). + */ + if (d > 2) + scc_mgr_zero_group(rw_group, 1); + + return 1; +} + +/** + * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering. + * @rw_group: Read/Write Group + * @test_bgn: Rank at which the test begins + * + * Stage 3: DQ/DQS Centering. + * + * This function implements UniPHY calibration Stage 3, as explained in + * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". + */ +static int rw_mgr_mem_calibrate_vfifo_end(const u32 rw_group, + const u32 test_bgn) +{ + int ret; + + debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn); + + /* Update info for sims. */ + reg_file_set_group(rw_group); + reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); + reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); + + ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, test_bgn, 0, 1); + if (ret) + set_failing_group_stage(rw_group, + CAL_STAGE_VFIFO_AFTER_WRITES, + CAL_SUBSTAGE_VFIFO_CENTER); + return ret; +} + +/** + * rw_mgr_mem_calibrate_lfifo() - Minimize latency + * + * Stage 4: Minimize latency. + * + * This function implements UniPHY calibration Stage 4, as explained in + * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". + * Calibrate LFIFO to find smallest read latency. + */ +static u32 rw_mgr_mem_calibrate_lfifo(void) +{ + int found_one = 0; + + debug("%s:%d\n", __func__, __LINE__); + + /* Update info for sims. */ + reg_file_set_stage(CAL_STAGE_LFIFO); + reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); + + /* Load up the patterns used by read calibration for all ranks */ + rw_mgr_mem_calibrate_read_load_patterns(0, 1); + + do { + writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); + debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", + __func__, __LINE__, gbl->curr_read_lat); + + if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, NUM_READ_TESTS, + PASS_ALL_BITS, 1)) + break; + + found_one = 1; + /* + * Reduce read latency and see if things are + * working correctly. + */ + gbl->curr_read_lat--; + } while (gbl->curr_read_lat > 0); + + /* Reset the fifos to get pointers to known state. */ + writel(0, &phy_mgr_cmd->fifo_reset); + + if (found_one) { + /* Add a fudge factor to the read latency that was determined */ + gbl->curr_read_lat += 2; + writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); + debug_cond(DLEVEL == 2, + "%s:%d lfifo: success: using read_lat=%u\n", + __func__, __LINE__, gbl->curr_read_lat); + } else { + set_failing_group_stage(0xff, CAL_STAGE_LFIFO, + CAL_SUBSTAGE_READ_LATENCY); + + debug_cond(DLEVEL == 2, + "%s:%d lfifo: failed at initial read_lat=%u\n", + __func__, __LINE__, gbl->curr_read_lat); + } + + return found_one; +} + +/** + * search_window() - Search for the/part of the window with DM/DQS shift + * @search_dm: If 1, search for the DM shift, if 0, search for DQS shift + * @rank_bgn: Rank number + * @write_group: Write Group + * @bgn_curr: Current window begin + * @end_curr: Current window end + * @bgn_best: Current best window begin + * @end_best: Current best window end + * @win_best: Size of the best window + * @new_dqs: New DQS value (only applicable if search_dm = 0). + * + * Search for the/part of the window with DM/DQS shift. + */ +static void search_window(const int search_dm, + const u32 rank_bgn, const u32 write_group, + int *bgn_curr, int *end_curr, int *bgn_best, + int *end_best, int *win_best, int new_dqs) +{ + u32 bit_chk; + const int max = iocfg->io_out1_delay_max - new_dqs; + int d, di; + + /* Search for the/part of the window with DM/DQS shift. */ + for (di = max; di >= 0; di -= DELTA_D) { + if (search_dm) { + d = di; + scc_mgr_apply_group_dm_out1_delay(d); + } else { + /* For DQS, we go from 0...max */ + d = max - di; + /* + * Note: This only shifts DQS, so are we limiting + * ourselves to width of DQ unnecessarily. + */ + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, + d + new_dqs); + } + + writel(0, &sdr_scc_mgr->update); + + if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, + PASS_ALL_BITS, &bit_chk, + 0)) { + /* Set current end of the window. */ + *end_curr = search_dm ? -d : d; + + /* + * If a starting edge of our window has not been seen + * this is our current start of the DM window. + */ + if (*bgn_curr == iocfg->io_out1_delay_max + 1) + *bgn_curr = search_dm ? -d : d; + + /* + * If current window is bigger than best seen. + * Set best seen to be current window. + */ + if ((*end_curr - *bgn_curr + 1) > *win_best) { + *win_best = *end_curr - *bgn_curr + 1; + *bgn_best = *bgn_curr; + *end_best = *end_curr; + } + } else { + /* We just saw a failing test. Reset temp edge. */ + *bgn_curr = iocfg->io_out1_delay_max + 1; + *end_curr = iocfg->io_out1_delay_max + 1; + + /* Early exit is only applicable to DQS. */ + if (search_dm) + continue; + + /* + * Early exit optimization: if the remaining delay + * chain space is less than already seen largest + * window we can exit. + */ + if (*win_best - 1 > iocfg->io_out1_delay_max - new_dqs - d) + break; + } + } +} + +/* + * rw_mgr_mem_calibrate_writes_center() - Center all windows + * @rank_bgn: Rank number + * @write_group: Write group + * @test_bgn: Rank at which the test begins + * + * Center all windows. Do per-bit-deskew to possibly increase size of + * certain windows. + */ +static int +rw_mgr_mem_calibrate_writes_center(const u32 rank_bgn, const u32 write_group, + const u32 test_bgn) +{ + int i; + u32 sticky_bit_chk; + u32 min_index; + int left_edge[rwcfg->mem_dq_per_write_dqs]; + int right_edge[rwcfg->mem_dq_per_write_dqs]; + int mid; + int mid_min, orig_mid_min; + int new_dqs, start_dqs; + int dq_margin, dqs_margin, dm_margin; + int bgn_curr = iocfg->io_out1_delay_max + 1; + int end_curr = iocfg->io_out1_delay_max + 1; + int bgn_best = iocfg->io_out1_delay_max + 1; + int end_best = iocfg->io_out1_delay_max + 1; + int win_best = 0; + + int ret; + + debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); + + dm_margin = 0; + + start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS | + SCC_MGR_IO_OUT1_DELAY_OFFSET) + + (rwcfg->mem_dq_per_write_dqs << 2)); + + /* Per-bit deskew. */ + + /* + * Set the left and right edge of each bit to an illegal value. + * Use (iocfg->io_out1_delay_max + 1) as an illegal value. + */ + sticky_bit_chk = 0; + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { + left_edge[i] = iocfg->io_out1_delay_max + 1; + right_edge[i] = iocfg->io_out1_delay_max + 1; + } + + /* Search for the left edge of the window for each bit. */ + search_left_edge(1, rank_bgn, write_group, 0, test_bgn, + &sticky_bit_chk, + left_edge, right_edge, 0); + + /* Search for the right edge of the window for each bit. */ + ret = search_right_edge(1, rank_bgn, write_group, 0, + start_dqs, 0, + &sticky_bit_chk, + left_edge, right_edge, 0); + if (ret) { + set_failing_group_stage(test_bgn + ret - 1, CAL_STAGE_WRITES, + CAL_SUBSTAGE_WRITES_CENTER); + return -EINVAL; + } + + min_index = get_window_mid_index(1, left_edge, right_edge, &mid_min); + + /* Determine the amount we can change DQS (which is -mid_min). */ + orig_mid_min = mid_min; + new_dqs = start_dqs; + mid_min = 0; + debug_cond(DLEVEL == 1, + "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n", + __func__, __LINE__, start_dqs, new_dqs, mid_min); + + /* Add delay to bring centre of all DQ windows to the same "level". */ + center_dq_windows(1, left_edge, right_edge, mid_min, orig_mid_min, + min_index, 0, &dq_margin, &dqs_margin); + + /* Move DQS */ + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); + writel(0, &sdr_scc_mgr->update); + + /* Centre DM */ + debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__); + + /* + * Set the left and right edge of each bit to an illegal value. + * Use (iocfg->io_out1_delay_max + 1) as an illegal value. + */ + left_edge[0] = iocfg->io_out1_delay_max + 1; + right_edge[0] = iocfg->io_out1_delay_max + 1; + + /* Search for the/part of the window with DM shift. */ + search_window(1, rank_bgn, write_group, &bgn_curr, &end_curr, + &bgn_best, &end_best, &win_best, 0); + + /* Reset DM delay chains to 0. */ + scc_mgr_apply_group_dm_out1_delay(0); + + /* + * Check to see if the current window nudges up aganist 0 delay. + * If so we need to continue the search by shifting DQS otherwise DQS + * search begins as a new search. + */ + if (end_curr != 0) { + bgn_curr = iocfg->io_out1_delay_max + 1; + end_curr = iocfg->io_out1_delay_max + 1; + } + + /* Search for the/part of the window with DQS shifts. */ + search_window(0, rank_bgn, write_group, &bgn_curr, &end_curr, + &bgn_best, &end_best, &win_best, new_dqs); + + /* Assign left and right edge for cal and reporting. */ + left_edge[0] = -1 * bgn_best; + right_edge[0] = end_best; + + debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", + __func__, __LINE__, left_edge[0], right_edge[0]); + + /* Move DQS (back to orig). */ + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); + + /* Move DM */ + + /* Find middle of window for the DM bit. */ + mid = (left_edge[0] - right_edge[0]) / 2; + + /* Only move right, since we are not moving DQS/DQ. */ + if (mid < 0) + mid = 0; + + /* dm_marign should fail if we never find a window. */ + if (win_best == 0) + dm_margin = -1; + else + dm_margin = left_edge[0] - mid; + + scc_mgr_apply_group_dm_out1_delay(mid); + writel(0, &sdr_scc_mgr->update); + + debug_cond(DLEVEL == 2, + "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n", + __func__, __LINE__, left_edge[0], right_edge[0], + mid, dm_margin); + /* Export values. */ + gbl->fom_out += dq_margin + dqs_margin; + + debug_cond(DLEVEL == 2, + "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n", + __func__, __LINE__, dq_margin, dqs_margin, dm_margin); + + /* + * Do not remove this line as it makes sure all of our + * decisions have been applied. + */ + writel(0, &sdr_scc_mgr->update); + + if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0)) + return -EINVAL; + + return 0; +} + +/** + * rw_mgr_mem_calibrate_writes() - Write Calibration Part One + * @rank_bgn: Rank number + * @group: Read/Write Group + * @test_bgn: Rank at which the test begins + * + * Stage 2: Write Calibration Part One. + * + * This function implements UniPHY calibration Stage 2, as explained in + * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". + */ +static int rw_mgr_mem_calibrate_writes(const u32 rank_bgn, const u32 group, + const u32 test_bgn) +{ + int ret; + + /* Update info for sims */ + debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn); + + reg_file_set_group(group); + reg_file_set_stage(CAL_STAGE_WRITES); + reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); + + ret = rw_mgr_mem_calibrate_writes_center(rank_bgn, group, test_bgn); + if (ret) + set_failing_group_stage(group, CAL_STAGE_WRITES, + CAL_SUBSTAGE_WRITES_CENTER); + + return ret; +} + +/** + * mem_precharge_and_activate() - Precharge all banks and activate + * + * Precharge all banks and activate row 0 in bank "000..." and bank "111...". + */ +static void mem_precharge_and_activate(void) +{ + int r; + + for (r = 0; r < rwcfg->mem_number_of_ranks; r++) { + /* Set rank. */ + set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); + + /* Precharge all banks. */ + writel(rwcfg->precharge_all, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET); + + writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0); + writel(rwcfg->activate_0_and_1_wait1, + &sdr_rw_load_jump_mgr_regs->load_jump_add0); + + writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1); + writel(rwcfg->activate_0_and_1_wait2, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + + /* Activate rows. */ + writel(rwcfg->activate_0_and_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET); + } +} + +/** + * mem_init_latency() - Configure memory RLAT and WLAT settings + * + * Configure memory RLAT and WLAT parameters. + */ +static void mem_init_latency(void) +{ + /* + * For AV/CV, LFIFO is hardened and always runs at full rate + * so max latency in AFI clocks, used here, is correspondingly + * smaller. + */ + const u32 max_latency = (1 << misccfg->max_latency_count_width) - 1; + u32 rlat, wlat; + + debug("%s:%d\n", __func__, __LINE__); + + /* + * Read in write latency. + * WL for Hard PHY does not include additive latency. + */ + wlat = readl(&data_mgr->t_wl_add); + wlat += readl(&data_mgr->mem_t_add); + + gbl->rw_wl_nop_cycles = wlat - 1; + + /* Read in readl latency. */ + rlat = readl(&data_mgr->t_rl_add); + + /* Set a pretty high read latency initially. */ + gbl->curr_read_lat = rlat + 16; + if (gbl->curr_read_lat > max_latency) + gbl->curr_read_lat = max_latency; + + writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); + + /* Advertise write latency. */ + writel(wlat, &phy_mgr_cfg->afi_wlat); +} + +/** + * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings + * + * Set VFIFO and LFIFO to instant-on settings in skip calibration mode. + */ +static void mem_skip_calibrate(void) +{ + u32 vfifo_offset; + u32 i, j, r; + + debug("%s:%d\n", __func__, __LINE__); + /* Need to update every shadow register set used by the interface */ + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + /* + * Set output phase alignment settings appropriate for + * skip calibration. + */ + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { + scc_mgr_set_dqs_en_phase(i, 0); + if (iocfg->dll_chain_length == 6) + scc_mgr_set_dqdqs_output_phase(i, 6); + else + scc_mgr_set_dqdqs_output_phase(i, 7); + /* + * Case:33398 + * + * Write data arrives to the I/O two cycles before write + * latency is reached (720 deg). + * -> due to bit-slip in a/c bus + * -> to allow board skew where dqs is longer than ck + * -> how often can this happen!? + * -> can claim back some ptaps for high freq + * support if we can relax this, but i digress... + * + * The write_clk leads mem_ck by 90 deg + * The minimum ptap of the OPA is 180 deg + * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay + * The write_clk is always delayed by 2 ptaps + * + * Hence, to make DQS aligned to CK, we need to delay + * DQS by: + * (720 - 90 - 180 - 2) * + * (360 / iocfg->dll_chain_length) + * + * Dividing the above by (360 / iocfg->dll_chain_length) + * gives us the number of ptaps, which simplies to: + * + * (1.25 * iocfg->dll_chain_length - 2) + */ + scc_mgr_set_dqdqs_output_phase(i, + 1.25 * iocfg->dll_chain_length - 2); + } + writel(0xff, &sdr_scc_mgr->dqs_ena); + writel(0xff, &sdr_scc_mgr->dqs_io_ena); + + for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) { + writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | + SCC_MGR_GROUP_COUNTER_OFFSET); + } + writel(0xff, &sdr_scc_mgr->dq_ena); + writel(0xff, &sdr_scc_mgr->dm_ena); + writel(0, &sdr_scc_mgr->update); + } + + /* Compensate for simulation model behaviour */ + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { + scc_mgr_set_dqs_bus_in_delay(i, 10); + scc_mgr_load_dqs(i); + } + writel(0, &sdr_scc_mgr->update); + + /* + * ArriaV has hard FIFOs that can only be initialized by incrementing + * in sequencer. + */ + vfifo_offset = misccfg->calib_vfifo_offset; + for (j = 0; j < vfifo_offset; j++) + writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy); + writel(0, &phy_mgr_cmd->fifo_reset); + + /* + * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal + * setting from generation-time constant. + */ + gbl->curr_read_lat = misccfg->calib_lfifo_offset; + writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); +} + +/** + * mem_calibrate() - Memory calibration entry point. + * + * Perform memory calibration. + */ +static u32 mem_calibrate(void) +{ + u32 i; + u32 rank_bgn, sr; + u32 write_group, write_test_bgn; + u32 read_group, read_test_bgn; + u32 run_groups, current_run; + u32 failing_groups = 0; + u32 group_failed = 0; + + const u32 rwdqs_ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; + + debug("%s:%d\n", __func__, __LINE__); + + /* Initialize the data settings */ + gbl->error_substage = CAL_SUBSTAGE_NIL; + gbl->error_stage = CAL_STAGE_NIL; + gbl->error_group = 0xff; + gbl->fom_in = 0; + gbl->fom_out = 0; + + /* Initialize WLAT and RLAT. */ + mem_init_latency(); + + /* Initialize bit slips. */ + mem_precharge_and_activate(); + + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { + writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | + SCC_MGR_GROUP_COUNTER_OFFSET); + /* Only needed once to set all groups, pins, DQ, DQS, DM. */ + if (i == 0) + scc_mgr_set_hhp_extras(); + + scc_set_bypass_mode(i); + } + + /* Calibration is skipped. */ + if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { + /* + * Set VFIFO and LFIFO to instant-on settings in skip + * calibration mode. + */ + mem_skip_calibrate(); + + /* + * Do not remove this line as it makes sure all of our + * decisions have been applied. + */ + writel(0, &sdr_scc_mgr->update); + return 1; + } + + /* Calibration is not skipped. */ + for (i = 0; i < NUM_CALIB_REPEAT; i++) { + /* + * Zero all delay chain/phase settings for all + * groups and all shadow register sets. + */ + scc_mgr_zero_all(); + + run_groups = ~0; + + for (write_group = 0, write_test_bgn = 0; write_group + < rwcfg->mem_if_write_dqs_width; write_group++, + write_test_bgn += rwcfg->mem_dq_per_write_dqs) { + /* Initialize the group failure */ + group_failed = 0; + + current_run = run_groups & ((1 << + RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); + run_groups = run_groups >> + RW_MGR_NUM_DQS_PER_WRITE_GROUP; + + if (current_run == 0) + continue; + + writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS | + SCC_MGR_GROUP_COUNTER_OFFSET); + scc_mgr_zero_group(write_group, 0); + + for (read_group = write_group * rwdqs_ratio, + read_test_bgn = 0; + read_group < (write_group + 1) * rwdqs_ratio; + read_group++, + read_test_bgn += rwcfg->mem_dq_per_read_dqs) { + if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO) + continue; + + /* Calibrate the VFIFO */ + if (rw_mgr_mem_calibrate_vfifo(read_group, + read_test_bgn)) + continue; + + if (!(gbl->phy_debug_mode_flags & + PHY_DEBUG_SWEEP_ALL_GROUPS)) + return 0; + + /* The group failed, we're done. */ + goto grp_failed; + } + + /* Calibrate the output side */ + for (rank_bgn = 0, sr = 0; + rank_bgn < rwcfg->mem_number_of_ranks; + rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) { + if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) + continue; + + /* Not needed in quick mode! */ + if (STATIC_CALIB_STEPS & + CALIB_SKIP_DELAY_SWEEPS) + continue; + + /* Calibrate WRITEs */ + if (!rw_mgr_mem_calibrate_writes(rank_bgn, + write_group, + write_test_bgn)) + continue; + + group_failed = 1; + if (!(gbl->phy_debug_mode_flags & + PHY_DEBUG_SWEEP_ALL_GROUPS)) + return 0; + } + + /* Some group failed, we're done. */ + if (group_failed) + goto grp_failed; + + for (read_group = write_group * rwdqs_ratio, + read_test_bgn = 0; + read_group < (write_group + 1) * rwdqs_ratio; + read_group++, + read_test_bgn += rwcfg->mem_dq_per_read_dqs) { + if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) + continue; + + if (!rw_mgr_mem_calibrate_vfifo_end(read_group, + read_test_bgn)) + continue; + + if (!(gbl->phy_debug_mode_flags & + PHY_DEBUG_SWEEP_ALL_GROUPS)) + return 0; + + /* The group failed, we're done. */ + goto grp_failed; + } + + /* No group failed, continue as usual. */ + continue; + +grp_failed: /* A group failed, increment the counter. */ + failing_groups++; + } + + /* + * USER If there are any failing groups then report + * the failure. + */ + if (failing_groups != 0) + return 0; + + if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO) + continue; + + /* Calibrate the LFIFO */ + if (!rw_mgr_mem_calibrate_lfifo()) + return 0; + } + + /* + * Do not remove this line as it makes sure all of our decisions + * have been applied. + */ + writel(0, &sdr_scc_mgr->update); + return 1; +} + +/** + * run_mem_calibrate() - Perform memory calibration + * + * This function triggers the entire memory calibration procedure. + */ +static int run_mem_calibrate(void) +{ + int pass; + + debug("%s:%d\n", __func__, __LINE__); + + /* Reset pass/fail status shown on afi_cal_success/fail */ + writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status); + + /* Stop tracking manager. */ + clrbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22); + + phy_mgr_initialize(); + rw_mgr_mem_initialize(); + + /* Perform the actual memory calibration. */ + pass = mem_calibrate(); + + mem_precharge_and_activate(); + writel(0, &phy_mgr_cmd->fifo_reset); + + /* Handoff. */ + rw_mgr_mem_handoff(); + /* + * In Hard PHY this is a 2-bit control: + * 0: AFI Mux Select + * 1: DDIO Mux Select + */ + writel(0x2, &phy_mgr_cfg->mux_sel); + + /* Start tracking manager. */ + setbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22); + + return pass; +} + +/** + * debug_mem_calibrate() - Report result of memory calibration + * @pass: Value indicating whether calibration passed or failed + * + * This function reports the results of the memory calibration + * and writes debug information into the register file. + */ +static void debug_mem_calibrate(int pass) +{ + u32 debug_info; + + if (pass) { + printf("%s: CALIBRATION PASSED\n", __FILE__); + + gbl->fom_in /= 2; + gbl->fom_out /= 2; + + if (gbl->fom_in > 0xff) + gbl->fom_in = 0xff; + + if (gbl->fom_out > 0xff) + gbl->fom_out = 0xff; + + /* Update the FOM in the register file */ + debug_info = gbl->fom_in; + debug_info |= gbl->fom_out << 8; + writel(debug_info, &sdr_reg_file->fom); + + writel(debug_info, &phy_mgr_cfg->cal_debug_info); + writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status); + } else { + printf("%s: CALIBRATION FAILED\n", __FILE__); + + debug_info = gbl->error_stage; + debug_info |= gbl->error_substage << 8; + debug_info |= gbl->error_group << 16; + + writel(debug_info, &sdr_reg_file->failing_stage); + writel(debug_info, &phy_mgr_cfg->cal_debug_info); + writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status); + + /* Update the failing group/stage in the register file */ + debug_info = gbl->error_stage; + debug_info |= gbl->error_substage << 8; + debug_info |= gbl->error_group << 16; + writel(debug_info, &sdr_reg_file->failing_stage); + } + + printf("%s: Calibration complete\n", __FILE__); +} + +/** + * hc_initialize_rom_data() - Initialize ROM data + * + * Initialize ROM data. + */ +static void hc_initialize_rom_data(void) +{ + unsigned int nelem = 0; + const u32 *rom_init; + u32 i, addr; + + socfpga_get_seq_inst_init(&rom_init, &nelem); + addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; + for (i = 0; i < nelem; i++) + writel(rom_init[i], addr + (i << 2)); + + socfpga_get_seq_ac_init(&rom_init, &nelem); + addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; + for (i = 0; i < nelem; i++) + writel(rom_init[i], addr + (i << 2)); +} + +/** + * initialize_reg_file() - Initialize SDR register file + * + * Initialize SDR register file. + */ +static void initialize_reg_file(void) +{ + /* Initialize the register file with the correct data */ + writel(misccfg->reg_file_init_seq_signature, &sdr_reg_file->signature); + writel(0, &sdr_reg_file->debug_data_addr); + writel(0, &sdr_reg_file->cur_stage); + writel(0, &sdr_reg_file->fom); + writel(0, &sdr_reg_file->failing_stage); + writel(0, &sdr_reg_file->debug1); + writel(0, &sdr_reg_file->debug2); +} + +/** + * initialize_hps_phy() - Initialize HPS PHY + * + * Initialize HPS PHY. + */ +static void initialize_hps_phy(void) +{ + u32 reg; + /* + * Tracking also gets configured here because it's in the + * same register. + */ + u32 trk_sample_count = 7500; + u32 trk_long_idle_sample_count = (10 << 16) | 100; + /* + * Format is number of outer loops in the 16 MSB, sample + * count in 16 LSB. + */ + + reg = 0; + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); + /* + * This field selects the intrinsic latency to RDATA_EN/FULL path. + * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. + */ + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( + trk_sample_count); + writel(reg, &sdr_ctrl->phy_ctrl0); + + reg = 0; + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( + trk_sample_count >> + SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( + trk_long_idle_sample_count); + writel(reg, &sdr_ctrl->phy_ctrl1); + + reg = 0; + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( + trk_long_idle_sample_count >> + SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); + writel(reg, &sdr_ctrl->phy_ctrl2); +} + +/** + * initialize_tracking() - Initialize tracking + * + * Initialize the register file with usable initial data. + */ +static void initialize_tracking(void) +{ + /* + * Initialize the register file with the correct data. + * Compute usable version of value in case we skip full + * computation later. + */ + writel(DIV_ROUND_UP(iocfg->delay_per_opa_tap, + iocfg->delay_per_dchain_tap) - 1, + &sdr_reg_file->dtaps_per_ptap); + + /* trk_sample_count */ + writel(7500, &sdr_reg_file->trk_sample_count); + + /* longidle outer loop [15:0] */ + writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle); + + /* + * longidle sample count [31:24] + * trfc, worst case of 933Mhz 4Gb [23:16] + * trcd, worst case [15:8] + * vfifo wait [7:0] + */ + writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0), + &sdr_reg_file->delays); + + /* mux delay */ + writel((rwcfg->idle << 24) | (rwcfg->activate_1 << 16) | + (rwcfg->sgle_read << 8) | (rwcfg->precharge_all << 0), + &sdr_reg_file->trk_rw_mgr_addr); + + writel(rwcfg->mem_if_read_dqs_width, + &sdr_reg_file->trk_read_dqs_width); + + /* trefi [7:0] */ + writel((rwcfg->refresh_all << 24) | (1000 << 0), + &sdr_reg_file->trk_rfsh); +} + +int sdram_calibration_full(void) +{ + struct param_type my_param; + struct gbl_type my_gbl; + u32 pass; + + memset(&my_param, 0, sizeof(my_param)); + memset(&my_gbl, 0, sizeof(my_gbl)); + + param = &my_param; + gbl = &my_gbl; + + rwcfg = socfpga_get_sdram_rwmgr_config(); + iocfg = socfpga_get_sdram_io_config(); + misccfg = socfpga_get_sdram_misc_config(); + + /* Set the calibration enabled by default */ + gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; + /* + * Only sweep all groups (regardless of fail state) by default + * Set enabled read test by default. + */ +#if DISABLE_GUARANTEED_READ + gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ; +#endif + /* Initialize the register file */ + initialize_reg_file(); + + /* Initialize any PHY CSR */ + initialize_hps_phy(); + + scc_mgr_initialize(); + + initialize_tracking(); + + printf("%s: Preparing to start memory calibration\n", __FILE__); + + debug("%s:%d\n", __func__, __LINE__); + debug_cond(DLEVEL == 1, + "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", + rwcfg->mem_number_of_ranks, rwcfg->mem_number_of_cs_per_dimm, + rwcfg->mem_dq_per_read_dqs, rwcfg->mem_dq_per_write_dqs, + rwcfg->mem_virtual_groups_per_read_dqs, + rwcfg->mem_virtual_groups_per_write_dqs); + debug_cond(DLEVEL == 1, + "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", + rwcfg->mem_if_read_dqs_width, rwcfg->mem_if_write_dqs_width, + rwcfg->mem_data_width, rwcfg->mem_data_mask_width, + iocfg->delay_per_opa_tap, iocfg->delay_per_dchain_tap); + debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u", + iocfg->delay_per_dqs_en_dchain_tap, iocfg->dll_chain_length); + debug_cond(DLEVEL == 1, + "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", + iocfg->dqs_en_phase_max, iocfg->dqdqs_out_phase_max, + iocfg->dqs_en_delay_max, iocfg->dqs_in_delay_max); + debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", + iocfg->io_in_delay_max, iocfg->io_out1_delay_max, + iocfg->io_out2_delay_max); + debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", + iocfg->dqs_in_reserve, iocfg->dqs_out_reserve); + + hc_initialize_rom_data(); + + /* update info for sims */ + reg_file_set_stage(CAL_STAGE_NIL); + reg_file_set_group(0); + + /* + * Load global needed for those actions that require + * some dynamic calibration support. + */ + dyn_calib_steps = STATIC_CALIB_STEPS; + /* + * Load global to allow dynamic selection of delay loop settings + * based on calibration mode. + */ + if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) + skip_delay_mask = 0xff; + else + skip_delay_mask = 0x0; + + pass = run_mem_calibrate(); + debug_mem_calibrate(pass); + return pass; +} diff --git a/drivers/ddr/altera/sequencer.h b/drivers/ddr/altera/sequencer.h new file mode 100644 index 0000000000..839a374968 --- /dev/null +++ b/drivers/ddr/altera/sequencer.h @@ -0,0 +1,227 @@ +/* + * Copyright Altera Corporation (C) 2012-2015 + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _SEQUENCER_H_ +#define _SEQUENCER_H_ + +#define RW_MGR_NUM_DM_PER_WRITE_GROUP (rwcfg->mem_data_mask_width \ + / rwcfg->mem_if_write_dqs_width) +#define RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP (rwcfg->true_mem_data_mask_width \ + / rwcfg->mem_if_write_dqs_width) + +#define RW_MGR_NUM_DQS_PER_WRITE_GROUP (rwcfg->mem_if_read_dqs_width \ + / rwcfg->mem_if_write_dqs_width) +#define NUM_RANKS_PER_SHADOW_REG (rwcfg->mem_number_of_ranks / NUM_SHADOW_REGS) + +#define RW_MGR_RUN_SINGLE_GROUP_OFFSET 0x0 +#define RW_MGR_RUN_ALL_GROUPS_OFFSET 0x0400 +#define RW_MGR_RESET_READ_DATAPATH_OFFSET 0x1000 +#define RW_MGR_SET_CS_AND_ODT_MASK_OFFSET 0x1400 +#define RW_MGR_INST_ROM_WRITE_OFFSET 0x1800 +#define RW_MGR_AC_ROM_WRITE_OFFSET 0x1C00 + +#define NUM_SHADOW_REGS 1 + +#define RW_MGR_RANK_NONE 0xFF +#define RW_MGR_RANK_ALL 0x00 + +#define RW_MGR_ODT_MODE_OFF 0 +#define RW_MGR_ODT_MODE_READ_WRITE 1 + +#define NUM_CALIB_REPEAT 1 + +#define NUM_READ_TESTS 7 +#define NUM_READ_PB_TESTS 7 +#define NUM_WRITE_TESTS 15 +#define NUM_WRITE_PB_TESTS 31 + +#define PASS_ALL_BITS 1 +#define PASS_ONE_BIT 0 + +/* calibration stages */ +#define CAL_STAGE_NIL 0 +#define CAL_STAGE_VFIFO 1 +#define CAL_STAGE_WLEVEL 2 +#define CAL_STAGE_LFIFO 3 +#define CAL_STAGE_WRITES 4 +#define CAL_STAGE_FULLTEST 5 +#define CAL_STAGE_REFRESH 6 +#define CAL_STAGE_CAL_SKIPPED 7 +#define CAL_STAGE_CAL_ABORTED 8 +#define CAL_STAGE_VFIFO_AFTER_WRITES 9 + +/* calibration substages */ +#define CAL_SUBSTAGE_NIL 0 +#define CAL_SUBSTAGE_GUARANTEED_READ 1 +#define CAL_SUBSTAGE_DQS_EN_PHASE 2 +#define CAL_SUBSTAGE_VFIFO_CENTER 3 +#define CAL_SUBSTAGE_WORKING_DELAY 1 +#define CAL_SUBSTAGE_LAST_WORKING_DELAY 2 +#define CAL_SUBSTAGE_WLEVEL_COPY 3 +#define CAL_SUBSTAGE_WRITES_CENTER 1 +#define CAL_SUBSTAGE_READ_LATENCY 1 +#define CAL_SUBSTAGE_REFRESH 1 + +#define SCC_MGR_GROUP_COUNTER_OFFSET 0x0000 +#define SCC_MGR_DQS_IN_DELAY_OFFSET 0x0100 +#define SCC_MGR_DQS_EN_PHASE_OFFSET 0x0200 +#define SCC_MGR_DQS_EN_DELAY_OFFSET 0x0300 +#define SCC_MGR_DQDQS_OUT_PHASE_OFFSET 0x0400 +#define SCC_MGR_OCT_OUT1_DELAY_OFFSET 0x0500 +#define SCC_MGR_IO_OUT1_DELAY_OFFSET 0x0700 +#define SCC_MGR_IO_IN_DELAY_OFFSET 0x0900 + +/* HHP-HPS-specific versions of some commands */ +#define SCC_MGR_DQS_EN_DELAY_GATE_OFFSET 0x0600 +#define SCC_MGR_IO_OE_DELAY_OFFSET 0x0800 +#define SCC_MGR_HHP_GLOBALS_OFFSET 0x0A00 +#define SCC_MGR_HHP_RFILE_OFFSET 0x0B00 +#define SCC_MGR_AFI_CAL_INIT_OFFSET 0x0D00 + +#define SDR_PHYGRP_SCCGRP_ADDRESS (SOCFPGA_SDR_ADDRESS | 0x0) +#define SDR_PHYGRP_PHYMGRGRP_ADDRESS (SOCFPGA_SDR_ADDRESS | 0x1000) +#define SDR_PHYGRP_RWMGRGRP_ADDRESS (SOCFPGA_SDR_ADDRESS | 0x2000) +#define SDR_PHYGRP_DATAMGRGRP_ADDRESS (SOCFPGA_SDR_ADDRESS | 0x4000) +#define SDR_PHYGRP_REGFILEGRP_ADDRESS (SOCFPGA_SDR_ADDRESS | 0x4800) + +#define PHY_MGR_CAL_RESET (0) +#define PHY_MGR_CAL_SUCCESS (1) +#define PHY_MGR_CAL_FAIL (2) + +#define CALIB_SKIP_DELAY_LOOPS (1 << 0) +#define CALIB_SKIP_ALL_BITS_CHK (1 << 1) +#define CALIB_SKIP_DELAY_SWEEPS (1 << 2) +#define CALIB_SKIP_VFIFO (1 << 3) +#define CALIB_SKIP_LFIFO (1 << 4) +#define CALIB_SKIP_WLEVEL (1 << 5) +#define CALIB_SKIP_WRITES (1 << 6) +#define CALIB_SKIP_FULL_TEST (1 << 7) +#define CALIB_SKIP_ALL (CALIB_SKIP_VFIFO | \ + CALIB_SKIP_LFIFO | CALIB_SKIP_WLEVEL | \ + CALIB_SKIP_WRITES | CALIB_SKIP_FULL_TEST) +#define CALIB_IN_RTL_SIM (1 << 8) + +/* Scan chain manager command addresses */ +#define READ_SCC_OCT_OUT2_DELAY 0 +#define READ_SCC_DQ_OUT2_DELAY 0 +#define READ_SCC_DQS_IO_OUT2_DELAY 0 +#define READ_SCC_DM_IO_OUT2_DELAY 0 + +/* HHP-HPS-specific values */ +#define SCC_MGR_HHP_EXTRAS_OFFSET 0 +#define SCC_MGR_HHP_DQSE_MAP_OFFSET 1 + +/* PHY Debug mode flag constants */ +#define PHY_DEBUG_IN_DEBUG_MODE 0x00000001 +#define PHY_DEBUG_ENABLE_CAL_RPT 0x00000002 +#define PHY_DEBUG_ENABLE_MARGIN_RPT 0x00000004 +#define PHY_DEBUG_SWEEP_ALL_GROUPS 0x00000008 +#define PHY_DEBUG_DISABLE_GUARANTEED_READ 0x00000010 +#define PHY_DEBUG_ENABLE_NON_DESTRUCTIVE_CALIBRATION 0x00000020 + +struct socfpga_sdr_rw_load_manager { + u32 load_cntr0; + u32 load_cntr1; + u32 load_cntr2; + u32 load_cntr3; +}; + +struct socfpga_sdr_rw_load_jump_manager { + u32 load_jump_add0; + u32 load_jump_add1; + u32 load_jump_add2; + u32 load_jump_add3; +}; + +struct socfpga_sdr_reg_file { + u32 signature; + u32 debug_data_addr; + u32 cur_stage; + u32 fom; + u32 failing_stage; + u32 debug1; + u32 debug2; + u32 dtaps_per_ptap; + u32 trk_sample_count; + u32 trk_longidle; + u32 delays; + u32 trk_rw_mgr_addr; + u32 trk_read_dqs_width; + u32 trk_rfsh; +}; + +/* parameter variable holder */ +struct param_type { + u32 read_correct_mask; + u32 read_correct_mask_vg; + u32 write_correct_mask; + u32 write_correct_mask_vg; +}; + + +/* global variable holder */ +struct gbl_type { + uint32_t phy_debug_mode_flags; + + /* current read latency */ + + uint32_t curr_read_lat; + + /* error code */ + + uint32_t error_substage; + uint32_t error_stage; + uint32_t error_group; + + /* figure-of-merit in, figure-of-merit out */ + + uint32_t fom_in; + uint32_t fom_out; + + /*USER Number of RW Mgr NOP cycles between + write command and write data */ + uint32_t rw_wl_nop_cycles; +}; + +struct socfpga_sdr_scc_mgr { + u32 dqs_ena; + u32 dqs_io_ena; + u32 dq_ena; + u32 dm_ena; + u32 __padding1[4]; + u32 update; + u32 __padding2[7]; + u32 active_rank; +}; + +/* PHY manager configuration registers. */ +struct socfpga_phy_mgr_cfg { + u32 phy_rlat; + u32 reset_mem_stbl; + u32 mux_sel; + u32 cal_status; + u32 cal_debug_info; + u32 vfifo_rd_en_ovrd; + u32 afi_wlat; + u32 afi_rlat; +}; + +/* PHY manager command addresses. */ +struct socfpga_phy_mgr_cmd { + u32 inc_vfifo_fr; + u32 inc_vfifo_hard_phy; + u32 fifo_reset; + u32 inc_vfifo_fr_hr; + u32 inc_vfifo_qr; +}; + +struct socfpga_data_mgr { + u32 __padding1; + u32 t_wl_add; + u32 mem_t_add; + u32 t_rl_add; +}; +#endif /* _SEQUENCER_H_ */ diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c index 63b3566e3e..4448250f5c 100644 --- a/drivers/fpga/socfpga.c +++ b/drivers/fpga/socfpga.c @@ -160,10 +160,13 @@ static void fpgamgr_program_write(const void *rbf_data, unsigned long rbf_size) " sub %1, #32\n" " subs %2, #1\n" " bne 1b\n" + " cmp %3, #0\n" + " beq 3f\n" "2: ldr %2, [%0], #4\n" " str %2, [%1]\n" " subs %3, #1\n" " bne 2b\n" + "3: nop\n" : "+r"(src), "+r"(dst), "+r"(loops32), "+r"(loops4) : : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "cc"); } diff --git a/drivers/net/designware.c b/drivers/net/designware.c index bcae842389..d9cb507695 100644 --- a/drivers/net/designware.c +++ b/drivers/net/designware.c @@ -608,10 +608,11 @@ static int designware_eth_ofdata_to_platdata(struct udevice *dev) static const struct udevice_id designware_eth_ids[] = { { .compatible = "allwinner,sun7i-a20-gmac" }, + { .compatible = "altr,socfpga-stmmac" }, { } }; -U_BOOT_DRIVER(eth_sandbox) = { +U_BOOT_DRIVER(eth_designware) = { .name = "eth_designware", .id = UCLASS_ETH, .of_match = designware_eth_ids, |