// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com> */ #define LOG_CATEGORY UCLASS_CLK #include <common.h> #include <dm.h> /* For DIV_ROUND_DOWN_ULL, defined in linux/kernel.h */ #include <div64.h> #include <log.h> #include <serial.h> #include <asm/io.h> #include <dt-bindings/clock/k210-sysctl.h> #include <kendryte/pll.h> #include <linux/bitfield.h> #include <linux/clk-provider.h> #include <linux/delay.h> #include <linux/err.h> #define CLK_K210_PLL "k210_clk_pll" #ifdef CONFIG_CLK_K210_SET_RATE static int k210_pll_enable(struct clk *clk); static int k210_pll_disable(struct clk *clk); /* * The PLL included with the Kendryte K210 appears to be a True Circuits, Inc. * General-Purpose PLL. The logical layout of the PLL with internal feedback is * approximately the following: * * +---------------+ * |reference clock| * +---------------+ * | * v * +--+ * |/r| * +--+ * | * v * +-------------+ * |divided clock| * +-------------+ * | * v * +--------------+ * |phase detector|<---+ * +--------------+ | * | | * v +--------------+ * +---+ |feedback clock| * |VCO| +--------------+ * +---+ ^ * | +--+ | * +--->|/f|---+ * | +--+ * v * +---+ * |/od| * +---+ * | * v * +------+ * |output| * +------+ * * The k210 PLLs have three factors: r, f, and od. Because of the feedback mode, * the effect of the division by f is to multiply the input frequency. The * equation for the output rate is * rate = (rate_in * f) / (r * od). * Moving knowns to one side of the equation, we get * rate / rate_in = f / (r * od) * Rearranging slightly, * abs_error = abs((rate / rate_in) - (f / (r * od))). * To get relative, error, we divide by the expected ratio * error = abs((rate / rate_in) - (f / (r * od))) / (rate / rate_in). * Simplifying, * error = abs(1 - f / (r * od)) / (rate / rate_in) * error = abs(1 - (f * rate_in) / (r * od * rate)) * Using the constants ratio = rate / rate_in and inv_ratio = rate_in / rate, * error = abs((f * inv_ratio) / (r * od) - 1) * This is the error used in evaluating parameters. * * r and od are four bits each, while f is six bits. Because r and od are * multiplied together, instead of the full 256 values possible if both bits * were used fully, there are only 97 distinct products. Combined with f, there * are 6208 theoretical settings for the PLL. However, most of these settings * can be ruled out immediately because they do not have the correct ratio. * * In addition to the constraint of approximating the desired ratio, parameters * must also keep internal pll frequencies within acceptable ranges. The divided * clock's minimum and maximum frequencies have a ratio of around 128. This * leaves fairly substantial room to work with, especially since the only * affected parameter is r. The VCO's minimum and maximum frequency have a ratio * of 5, which is considerably more restrictive. * * The r and od factors are stored in a table. This is to make it easy to find * the next-largest product. Some products have multiple factorizations, but * only when one factor has at least a 2.5x ratio to the factors of the other * factorization. This is because any smaller ratio would not make a difference * when ensuring the VCO's frequency is within spec. * * Throughout the calculation function, fixed point arithmetic is used. Because * the range of rate and rate_in may be up to 1.75 GHz, or around 2^30, 64-bit * 32.32 fixed-point numbers are used to represent ratios. In general, to * implement division, the numerator is first multiplied by 2^32. This gives a * result where the whole number part is in the upper 32 bits, and the fraction * is in the lower 32 bits. * * In general, rounding is done to the closest integer. This helps find the best * approximation for the ratio. Rounding in one direction (e.g down) could cause * the function to miss a better ratio with one of the parameters increased by * one. */ /* * The factors table was generated with the following python code: * * def p(x, y): * return (1.0*x/y > 2.5) or (1.0*y/x > 2.5) * * factors = {} * for i in range(1, 17): * for j in range(1, 17): * fs = factors.get(i*j) or [] * if fs == [] or all([ * (p(i, x) and p(i, y)) or (p(j, x) and p(j, y)) * for (x, y) in fs]): * fs.append((i, j)) * factors[i*j] = fs * * for k, l in sorted(factors.items()): * for v in l: * print("PACK(%s, %s)," % v) */ #define PACK(r, od) (((((r) - 1) & 0xF) << 4) | (((od) - 1) & 0xF)) #define UNPACK_R(val) ((((val) >> 4) & 0xF) + 1) #define UNPACK_OD(val) (((val) & 0xF) + 1) static const u8 factors[] = { PACK(1, 1), PACK(1, 2), PACK(1, 3), PACK(1, 4), PACK(1, 5), PACK(1, 6), PACK(1, 7), PACK(1, 8), PACK(1, 9), PACK(3, 3), PACK(1, 10), PACK(1, 11), PACK(1, 12), PACK(3, 4), PACK(1, 13), PACK(1, 14), PACK(1, 15), PACK(3, 5), PACK(1, 16), PACK(4, 4), PACK(2, 9), PACK(2, 10), PACK(3, 7), PACK(2, 11), PACK(2, 12), PACK(5, 5), PACK(2, 13), PACK(3, 9), PACK(2, 14), PACK(2, 15), PACK(2, 16), PACK(3, 11), PACK(5, 7), PACK(3, 12), PACK(3, 13), PACK(4, 10), PACK(3, 14), PACK(4, 11), PACK(3, 15), PACK(3, 16), PACK(7, 7), PACK(5, 10), PACK(4, 13), PACK(6, 9), PACK(5, 11), PACK(4, 14), PACK(4, 15), PACK(7, 9), PACK(4, 16), PACK(5, 13), PACK(6, 11), PACK(5, 14), PACK(6, 12), PACK(5, 15), PACK(7, 11), PACK(6, 13), PACK(5, 16), PACK(9, 9), PACK(6, 14), PACK(8, 11), PACK(6, 15), PACK(7, 13), PACK(6, 16), PACK(7, 14), PACK(9, 11), PACK(10, 10), PACK(8, 13), PACK(7, 15), PACK(9, 12), PACK(10, 11), PACK(7, 16), PACK(9, 13), PACK(8, 15), PACK(11, 11), PACK(9, 14), PACK(8, 16), PACK(10, 13), PACK(11, 12), PACK(9, 15), PACK(10, 14), PACK(11, 13), PACK(9, 16), PACK(10, 15), PACK(11, 14), PACK(12, 13), PACK(10, 16), PACK(11, 15), PACK(12, 14), PACK(13, 13), PACK(11, 16), PACK(12, 15), PACK(13, 14), PACK(12, 16), PACK(13, 15), PACK(14, 14), PACK(13, 16), PACK(14, 15), PACK(14, 16), PACK(15, 15), PACK(15, 16), PACK(16, 16), }; TEST_STATIC int k210_pll_calc_config(u32 rate, u32 rate_in, struct k210_pll_config *best) { int i; s64 error, best_error; u64 ratio, inv_ratio; /* fixed point 32.32 ratio of the rates */ u64 max_r; u64 r, f, od; /* * Can't go over 1.75 GHz or under 21.25 MHz due to limitations on the * VCO frequency. These are not the same limits as below because od can * reduce the output frequency by 16. */ if (rate > 1750000000 || rate < 21250000) return -EINVAL; /* Similar restrictions on the input rate */ if (rate_in > 1750000000 || rate_in < 13300000) return -EINVAL; ratio = DIV_ROUND_CLOSEST_ULL((u64)rate << 32, rate_in); inv_ratio = DIV_ROUND_CLOSEST_ULL((u64)rate_in << 32, rate); /* Can't increase by more than 64 or reduce by more than 256 */ if (rate > rate_in && ratio > (64ULL << 32)) return -EINVAL; else if (rate <= rate_in && inv_ratio > (256ULL << 32)) return -EINVAL; /* * The divided clock (rate_in / r) must stay between 1.75 GHz and 13.3 * MHz. There is no minimum, since the only way to get a higher input * clock than 26 MHz is to use a clock generated by a PLL. Because PLLs * cannot output frequencies greater than 1.75 GHz, the minimum would * never be greater than one. */ max_r = DIV_ROUND_DOWN_ULL(rate_in, 13300000); /* Variables get immediately incremented, so start at -1th iteration */ i = -1; f = 0; r = 0; od = 0; best_error = S64_MAX; error = best_error; /* do-while here so we always try at least one ratio */ do { /* * Whether we swapped r and od while enforcing frequency limits */ bool swapped = false; u64 last_od = od; u64 last_r = r; /* * Try the next largest value for f (or r and od) and * recalculate the other parameters based on that */ if (rate > rate_in) { /* * Skip factors of the same product if we already tried * out that product */ do { i++; r = UNPACK_R(factors[i]); od = UNPACK_OD(factors[i]); } while (i + 1 < ARRAY_SIZE(factors) && r * od == last_r * last_od); /* Round close */ f = (r * od * ratio + BIT(31)) >> 32; if (f > 64) f = 64; } else { u64 tmp = ++f * inv_ratio; bool round_up = !!(tmp & BIT(31)); u32 goal = (tmp >> 32) + round_up; u32 err, last_err; /* Get the next r/od pair in factors */ while (r * od < goal && i + 1 < ARRAY_SIZE(factors)) { i++; r = UNPACK_R(factors[i]); od = UNPACK_OD(factors[i]); } /* * This is a case of double rounding. If we rounded up * above, we need to round down (in cases of ties) here. * This prevents off-by-one errors resulting from * choosing X+2 over X when X.Y rounds up to X+1 and * there is no r * od = X+1. For the converse, when X.Y * is rounded down to X, we should choose X+1 over X-1. */ err = abs(r * od - goal); last_err = abs(last_r * last_od - goal); if (last_err < err || (round_up && last_err == err)) { i--; r = last_r; od = last_od; } } /* * Enforce limits on internal clock frequencies. If we * aren't in spec, try swapping r and od. If everything is * in-spec, calculate the relative error. */ while (true) { /* * Whether the intermediate frequencies are out-of-spec */ bool out_of_spec = false; if (r > max_r) { out_of_spec = true; } else { /* * There is no way to only divide once; we need * to examine the frequency with and without the * effect of od. */ u64 vco = DIV_ROUND_CLOSEST_ULL(rate_in * f, r); if (vco > 1750000000 || vco < 340000000) out_of_spec = true; } if (out_of_spec) { if (!swapped) { u64 tmp = r; r = od; od = tmp; swapped = true; continue; } else { /* * Try looking ahead to see if there are * additional factors for the same * product. */ if (i + 1 < ARRAY_SIZE(factors)) { u64 new_r, new_od; i++; new_r = UNPACK_R(factors[i]); new_od = UNPACK_OD(factors[i]); if (r * od == new_r * new_od) { r = new_r; od = new_od; swapped = false; continue; } i--; } break; } } error = DIV_ROUND_CLOSEST_ULL(f * inv_ratio, r * od); /* The lower 16 bits are spurious */ error = abs((error - BIT(32))) >> 16; if (error < best_error) { best->r = r; best->f = f; best->od = od; best_error = error; } break; } } while (f < 64 && i + 1 < ARRAY_SIZE(factors) && error != 0); if (best_error == S64_MAX) return -EINVAL; log_debug("best error %lld\n", best_error); return 0; } static ulong k210_pll_set_rate(struct clk *clk, ulong rate) { int err; long long rate_in = clk_get_parent_rate(clk); struct k210_pll_config config = {}; struct k210_pll *pll = to_k210_pll(clk); u32 reg; if (rate_in < 0) return rate_in; log_debug("Calculating parameters with rate=%lu and rate_in=%lld\n", rate, rate_in); err = k210_pll_calc_config(rate, rate_in, &config); if (err) return err; log_debug("Got r=%u f=%u od=%u\n", config.r, config.f, config.od); /* * Don't use clk_disable as it might not actually disable the pll due to * refcounting */ k210_pll_disable(clk); reg = readl(pll->reg); reg &= ~K210_PLL_CLKR & ~K210_PLL_CLKF & ~K210_PLL_CLKOD & ~K210_PLL_BWADJ; reg |= FIELD_PREP(K210_PLL_CLKR, config.r - 1) | FIELD_PREP(K210_PLL_CLKF, config.f - 1) | FIELD_PREP(K210_PLL_CLKOD, config.od - 1) | FIELD_PREP(K210_PLL_BWADJ, config.f - 1); writel(reg, pll->reg); err = k210_pll_enable(clk); if (err) return err; serial_setbrg(); return clk_get_rate(clk); } #endif /* CONFIG_CLK_K210_SET_RATE */ static ulong k210_pll_get_rate(struct clk *clk) { long long rate_in = clk_get_parent_rate(clk); struct k210_pll *pll = to_k210_pll(clk); u64 r, f, od; u32 reg = readl(pll->reg); if (rate_in < 0 || (reg & K210_PLL_BYPASS)) return rate_in; if (!(reg & K210_PLL_PWRD)) return 0; r = FIELD_GET(K210_PLL_CLKR, reg) + 1; f = FIELD_GET(K210_PLL_CLKF, reg) + 1; od = FIELD_GET(K210_PLL_CLKOD, reg) + 1; return DIV_ROUND_DOWN_ULL(((u64)rate_in) * f, r * od); } /* * Wait for the PLL to be locked. If the PLL is not locked, try clearing the * slip before retrying */ static void k210_pll_waitfor_lock(struct k210_pll *pll) { u32 mask = GENMASK(pll->width - 1, 0) << pll->shift; while (true) { u32 reg = readl(pll->lock); if ((reg & mask) == mask) break; reg |= BIT(pll->shift + K210_PLL_CLEAR_SLIP); writel(reg, pll->lock); } } /* Adapted from sysctl_pll_enable */ static int k210_pll_enable(struct clk *clk) { struct k210_pll *pll = to_k210_pll(clk); u32 reg = readl(pll->reg); if ((reg | K210_PLL_PWRD) && !(reg | K210_PLL_RESET)) return 0; reg |= K210_PLL_PWRD; writel(reg, pll->reg); /* Ensure reset is low before asserting it */ reg &= ~K210_PLL_RESET; writel(reg, pll->reg); reg |= K210_PLL_RESET; writel(reg, pll->reg); nop(); nop(); reg &= ~K210_PLL_RESET; writel(reg, pll->reg); k210_pll_waitfor_lock(pll); reg &= ~K210_PLL_BYPASS; writel(reg, pll->reg); return 0; } static int k210_pll_disable(struct clk *clk) { struct k210_pll *pll = to_k210_pll(clk); u32 reg = readl(pll->reg); /* * Bypassing before powering off is important so child clocks don't stop * working. This is especially important for pll0, the indirect parent * of the cpu clock. */ reg |= K210_PLL_BYPASS; writel(reg, pll->reg); reg &= ~K210_PLL_PWRD; writel(reg, pll->reg); return 0; } const struct clk_ops k210_pll_ops = { .get_rate = k210_pll_get_rate, #ifdef CONFIG_CLK_K210_SET_RATE .set_rate = k210_pll_set_rate, #endif .enable = k210_pll_enable, .disable = k210_pll_disable, }; struct clk *k210_register_pll_struct(const char *name, const char *parent_name, struct k210_pll *pll) { int ret; struct clk *clk = &pll->clk; ret = clk_register(clk, CLK_K210_PLL, name, parent_name); if (ret) return ERR_PTR(ret); return clk; } struct clk *k210_register_pll(const char *name, const char *parent_name, void __iomem *reg, void __iomem *lock, u8 shift, u8 width) { struct clk *clk; struct k210_pll *pll; pll = kzalloc(sizeof(*pll), GFP_KERNEL); if (!pll) return ERR_PTR(-ENOMEM); pll->reg = reg; pll->lock = lock; pll->shift = shift; pll->width = width; clk = k210_register_pll_struct(name, parent_name, pll); if (IS_ERR(clk)) kfree(pll); return clk; } U_BOOT_DRIVER(k210_pll) = { .name = CLK_K210_PLL, .id = UCLASS_CLK, .ops = &k210_pll_ops, };